import os 
os.environ["OMP_NUM_THREADS"]="1"  #pour régler la configuration ( corrige la cause technique )
import warnings 
warnings.filterwarnings("ignore", message="Kmeans is known to have a memory leak") #masquer le message d'avertissement 

import numpy as np
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score

from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
import matplotlib.pyplot as plt

import seaborn as sns

df_finale = pd.read_csv('Bouzouita_Hayette_df_finale_p9.csv')

df_finale.head()

#Selection des colonnes d'analyses (sans ID)

#on garde les identifiants à part
ids = df_finale[["Code zone","Pays"]].copy()

#colonne numériques utiles pour le clustering 
cols_to_analyse = [ 
    "Importations - Quantité",
    "TDI_avicole (%)",
    "taux de croissance du PIB (%)",
    "RNB/hab (USD)",
    "Quantité de produits avicoles (kg/hab/an) ",
    "Part avicole (%)",
    "CAGR (%/an), 2000-2018",
    "Aliments avicoles (Milliers de tonnes)",
    "Pop_2017",
    "Surface agricole par hab (ha/hab)",
    "Stabilité politique"
]

X = df_finale[cols_to_analyse].copy()

#Standardiser les données (Z-score)
scaler = StandardScaler()
X_scaled_np = scaler.fit_transform(X)

print(X_scaled_np[:2])

#retransforme en DataFrame pour rester lisible (et pour concatenation)
X_scaled = pd.DataFrame(X_scaled_np,index=df_finale.index,columns=cols_to_analyse)

X_scaled.head()

[[-0.30802385  0.21915831  0.25245639 -0.52943792  0.0166037  -0.01239645
  -1.31731422 -0.21887677 -0.26191189 -0.20633399 -0.63576152]
 [-0.09694981  0.75478959 -0.52212017 -0.73020191 -1.30705822 -0.96904691
   1.54838726 -0.21667196 -0.04769655 -0.07437158 -3.08559831]]

#concaténer les IDs pour garder la correspondance
df_ready = pd.concat([ids,X_scaled], axis=1)
df_ready.head()

# 1) Lien hiérarchique (Distance euclidienne, Ward minimise la variance intra-cluster)
Z = linkage(X_scaled, method="ward")

plt.figure(figsize=(23,6))
dendrogram(Z, labels=ids["Pays"].values, leaf_rotation=90, leaf_font_size=8)
plt.title("dendrogramme - CAH")
plt.xlabel("Pays")
plt.ylabel("Distance")
plt.show()

#Choix subjectif, visuellement: 
#Si je coupe vers 13-14 -> j'obtiens 6 clusters 
#Si je coupe trop haut (vers 20 par exemple) -> je tombes à seulement 3 clusters pas intéressant (les 3 pays géants sont mis en avant et les autres très flou)
#Si je coupe trop bas -> j'obtiens beaucoup trop de petits clusters 

thr=13 #endroit de coupe au niveau des distances(12.6 -> 14.2 => grand saut)

plt.figure(figsize=(20,6))
dendrogram(Z, labels=ids["Pays"].values, leaf_rotation=90, leaf_font_size=8, color_threshold=thr)
plt.axhline(y=thr, color= "red", linestyle="--", linewidth=1.5)
plt.title("dendrogramme - CAH")
plt.xlabel("Pays")
plt.ylabel("Distance")

plt.savefig("dendrogramme.png",dpi=300, bbox_inches="tight")
plt.show()

plt.figure(figsize=(12,6))
dendrogram(Z,truncate_mode="lastp", p=6, show_contracted=False) #p = nb cluster voulu
plt.title("Dendrogramme (tronqué) - vue des grands regroupements (clusters)")
plt.ylabel("Distance")

plt.savefig("DendrogrammeTronquéNBcluster.png",dpi=300, bbox_inches="tight")
plt.show()

#CONFIRMATION DU CHOIX DU NB CLUSTERS 6

# distance des dernières fusions 
last = Z[-50:,2]
steps = np.arange(1,last.shape[0]+1)

plt.figure(figsize=(6,4))
plt.plot(steps, last, marker="o")
plt.title("Sauts de distance (30 dernieres fusions)")
plt.xlabel("Etapes de fusion (fin d'algorithme -> droite)")
plt.ylabel("Distance de fusion")
plt.grid(True)

#ligne de coupure 
#plt.axhline(y=12.80, color="red", linestyle="--")

plt.show()

#Découpage en 6 clusters

k= 6 #6 clusters
clusters_cah= fcluster(Z, t=k, criterion="maxclust")
res_cah=ids.copy()
res_cah["Cluster_CAH"]=clusters_cah

#listing des pays par cluster

for clust, pays in res_cah.groupby("Cluster_CAH")["Pays"]:
    print(f"\nCluster {clust} ({len(pays)} pays) :")
    print(", ".join(pays))

Cluster 1 (59 pays) :
Afghanistan, Algérie, Angola, Bangladesh, Belize, Myanmar, Cameroun, République centrafricaine, Sri Lanka, Tchad, Colombie, Azerbaïdjan, Équateur, Égypte, El Salvador, Guatemala, Guinée, Honduras, Indonésie, Iran (République islamique d'), Côte d'Ivoire, Jordanie, Kirghizistan, Kenya, Cambodge, République populaire démocratique de Corée, Liban, Madagascar, Malawi, Mali, Maroc, Mozambique, Népal, Nicaragua, Niger, Nigéria, Pakistan, Paraguay, Pérou, Philippines, Guinée-Bissau, Zimbabwe, Rwanda, Sénégal, Sierra Leone, Eswatini, Turkménistan, République-Unie de Tanzanie, Thaïlande, Togo, Tunisie, Turquie, Ouganda, Burkina Faso, Venezuela (République bolivarienne du), Viet Nam, Éthiopie, Yémen, Zambie

Cluster 2 (3 pays) :
Chine, continentale, Inde, États-Unis d'Amérique

Cluster 3 (9 pays) :
Allemagne, Chine - RAS de Hong-Kong, Iraq, Japon, Mexique, Pays-Bas, Arabie saoudite, Royaume-Uni de Grande-Bretagne et d'Irlande du Nord, Belgique

Cluster 4 (6 pays) :
Australie, Botswana, Kazakhstan, Mauritanie, Mongolie, Namibie

Cluster 5 (31 pays) :
Antigua-et-Barbuda, Bahamas, Îles Salomon, Cabo Verde, Congo, Bénin, Dominique, Polynésie française, Djibouti, Gabon, Gambie, Ghana, Kiribati, Grenade, Haïti, Israël, Koweït, Libéria, Maldives, Nouvelle-Calédonie, Vanuatu, Timor-Leste, Saint-Kitts-et-Nevis, Sainte-Lucie, Saint-Vincent-et-les Grenadines, Sao Tomé-et-Principe, Suriname, Tadjikistan, Oman, Émirats arabes unis, Samoa

Cluster 6 (57 pays) :
Arménie, Albanie, Argentine, Autriche, Barbade, Bolivie (État plurinational de), Brésil, Bulgarie, Canada, Chili, Costa Rica, Cuba, Chypre, Danemark, République dominicaine, Bélarus, Estonie, Fidji, Finlande, Géorgie, Bosnie-Herzégovine, Grèce, Guyana, Hongrie, Croatie, Islande, Irlande, Italie, Jamaïque, République de Corée, Lettonie, Lesotho, Lituanie, Malaisie, Malte, Maurice, République de Moldova, Macédoine du Nord, Nouvelle-Zélande, Norvège, Panama, Tchéquie, Pologne, Portugal, Roumanie, Fédération de Russie, Slovénie, Slovaquie, Afrique du Sud, Espagne, Suède, Suisse, Chine, Taiwan Province de, Trinité-et-Tobago, Ukraine, Uruguay, Luxembourg

#Profil moyen par cluster (sur les données standardisées) 

#joindre les labels au X_scaled 
df_work = pd.concat([res_cah[["Cluster_CAH"]].reset_index(drop=True), pd.DataFrame(X_scaled,columns=X_scaled.columns)],axis=1)

profils_std = df_work.groupby("Cluster_CAH").mean().round(2)
profils_std.head(6)

#Profil moyen par cluster (sur X) 


df_work_orig = pd.concat([res_cah[["Cluster_CAH"]].reset_index(drop=True), pd.DataFrame(X,columns=X.columns)],axis=1)

profils_std = df_work_orig.groupby("Cluster_CAH").mean().round(2)
profils_std.head(6)

# Fusion des clusters avec les données originales
df_cah = df_finale.copy()
df_cah["Cluster_CAH"] = res_cah["Cluster_CAH"].values

# Colonnes numériques à tracer
num_cols = [c for c in df_cah.columns if c not in ["Code zone", "Pays", "Cluster_CAH"]]

rows=4
plt.figure(figsize=(18, rows*4))

for i, col in enumerate(num_cols, 1):
    plt.subplot(4, 3, i)
    sns.boxplot(x="Cluster_CAH", y=col, data=df_cah)
    plt.title(col, fontsize=10)
    plt.xlabel("")
    plt.ylabel("")

plt.tight_layout()
plt.savefig("boxplot_CAH.png",dpi=400, bbox_inches="tight")
plt.show()

#plage de k à tester 
k_values = range(2,10)

inertias = []
sil_scores = []

for k in k_values:
    km= KMeans(n_clusters=k, n_init = 20, random_state=0)
    labels = km.fit_predict(X_scaled)
    inertias.append(km.inertia_)                                         #coude
    sil_scores.append(silhouette_score(X_scaled, labels))                #silhouette ([-1,1], + élevé = mieux)

#graph 
fig, axes = plt.subplots(1, 2, figsize=(16,4))

axes[0].plot(list(k_values), inertias, marker='o')
axes[0].set_title("Méthode du coude (Inertie)")
axes[0].set_xlabel("k")
axes[0].set_ylabel("Inertie")
axes[0].grid(True, linestyle=':')

axes[1].plot(list(k_values), sil_scores, marker='o')
axes[1].set_title("Indice de silhouette")
axes[1].set_xlabel("k")
axes[1].set_ylabel("Silhouette")
axes[1].grid(True, linestyle=':')


plt.tight_layout()
plt.savefig("coude_silhouette.png",dpi=300, bbox_inches="tight")
plt.show()

#tableau recap 
scores_df = pd.DataFrame({
    "k": list(k_values),
    "inertie": inertias, 
    "silhouette": sil_scores, })
scores_df

#Application de k-means 
kmeans =  KMeans(n_clusters=5, random_state=0)
clusters_km = kmeans.fit_predict(X_scaled)

#ajouter à la table 
res_kmeans = ids.copy() #ids contient pays + codes 
res_kmeans["Cluster_kmeans"] = clusters_km

#taille des clusters 
print(res_kmeans["Cluster_kmeans"].value_counts())

Cluster_kmeans
3    75
1    61
0    26
2     2
4     1
Name: count, dtype: int64

#listing des pays par cluster

for clust, pays in res_kmeans.groupby("Cluster_kmeans")["Pays"]:
    print(f"\nCluster {clust} ({len(pays)} pays) :")
    print(", ".join(pays))

Cluster 0 (26 pays) :
Angola, Antigua-et-Barbuda, Îles Salomon, Cabo Verde, Congo, Bénin, Gabon, Gambie, Ghana, Kiribati, Guatemala, Haïti, Chine - RAS de Hong-Kong, Iraq, Jordanie, Koweït, Libéria, Maldives, Namibie, Vanuatu, Sao Tomé-et-Principe, Arabie saoudite, Suriname, Oman, Émirats arabes unis, Belgique

Cluster 1 (61 pays) :
Afghanistan, Algérie, Bangladesh, Bolivie (État plurinational de), Botswana, Belize, Myanmar, Cameroun, République centrafricaine, Tchad, Azerbaïdjan, Équateur, Égypte, Djibouti, Guinée, Honduras, Indonésie, Iran (République islamique d'), Côte d'Ivoire, Kazakhstan, Kirghizistan, Kenya, Cambodge, République populaire démocratique de Corée, Liban, Lesotho, Madagascar, Malawi, Mali, Mauritanie, Maroc, Mozambique, Népal, Nicaragua, Niger, Nigéria, Pakistan, Paraguay, Pérou, Philippines, Guinée-Bissau, Timor-Leste, Zimbabwe, Rwanda, Sénégal, Sierra Leone, Tadjikistan, Eswatini, Turkménistan, République-Unie de Tanzanie, Thaïlande, Togo, Tunisie, Turquie, Ouganda, Burkina Faso, Venezuela (République bolivarienne du), Viet Nam, Éthiopie, Yémen, Zambie

Cluster 2 (2 pays) :
Chine, continentale, Inde

Cluster 3 (75 pays) :
Arménie, Albanie, Argentine, Australie, Autriche, Bahamas, Barbade, Brésil, Bulgarie, Canada, Sri Lanka, Chili, Colombie, Costa Rica, Cuba, Chypre, Danemark, Dominique, République dominicaine, Bélarus, El Salvador, Estonie, Fidji, Finlande, Polynésie française, Géorgie, Allemagne, Bosnie-Herzégovine, Grèce, Grenade, Guyana, Hongrie, Croatie, Islande, Irlande, Israël, Italie, Jamaïque, Japon, République de Corée, Lettonie, Lituanie, Malaisie, Malte, Maurice, Mexique, République de Moldova, Pays-Bas, Nouvelle-Calédonie, Macédoine du Nord, Nouvelle-Zélande, Norvège, Panama, Tchéquie, Pologne, Portugal, Roumanie, Fédération de Russie, Saint-Kitts-et-Nevis, Sainte-Lucie, Saint-Vincent-et-les Grenadines, Slovénie, Slovaquie, Afrique du Sud, Espagne, Suède, Suisse, Chine, Taiwan Province de, Trinité-et-Tobago, Royaume-Uni de Grande-Bretagne et d'Irlande du Nord, Ukraine, États-Unis d'Amérique, Uruguay, Samoa, Luxembourg

Cluster 4 (1 pays) :
Mongolie

#profil moyen par cluster
df_work = pd.concat([res_kmeans["Cluster_kmeans"].reset_index(drop=True), pd.DataFrame (X_scaled, columns= X_scaled.columns)],axis=1)
profils_km= df_work.groupby("Cluster_kmeans").mean().round(2)
profils_km.head(6)

#profil moyen par cluster
df_work = pd.concat([res_kmeans["Cluster_kmeans"].reset_index(drop=True), pd.DataFrame (X, columns= X.columns)],axis=1)
profils_km= df_work.groupby("Cluster_kmeans").mean().round(2)
profils_km.head(6)

# Fusion des clusters avec les données originales
df_kmeans = df_finale.copy()
df_kmeans["Cluster_kmeans"] = res_kmeans["Cluster_kmeans"].values

# Colonnes numériques à tracer
num_cols = [c for c in df_kmeans.columns if c not in ["Code zone", "Pays", "Cluster_kmeans"]]
rows=4
plt.figure(figsize=(18, rows*4))

for i, col in enumerate(num_cols, 1):
    plt.subplot(4, 3, i)
    sns.boxplot(x="Cluster_kmeans", y=col, data=df_kmeans)
    plt.title(col, fontsize=10)
    plt.xlabel("")
    plt.ylabel("")

plt.tight_layout()
plt.savefig("boxplot_Kmeans.png",dpi=300, bbox_inches="tight")
plt.show()

from scipy.optimize import linear_sum_assignment
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score

#df_compar doit contenir : Pays , Cluster_CAH, Cluster_Kmeans
df_compar = df_finale.copy()
df_compar["Cluster_CAH"]= clusters_cah
df_compar["Cluster_Kmeans"]= clusters_km
df_compar[["Pays","Cluster_CAH","Cluster_Kmeans"]].head(6)

#tableau de contingence + heatmap 

ct = pd.crosstab(df_compar["Cluster_CAH"], df_compar["Cluster_Kmeans"])
display(ct)

plt.figure(figsize=(6,4))
sns.heatmap(ct,annot=True,fmt="d",cmap="Reds")
plt.title("Chevauchement CAH vs K-means (effectifs)")
plt.xlabel("K-means")
plt.ylabel("CAH")

plt.savefig("matrice_cluster_effectif.png",dpi=300, bbox_inches="tight")
plt.show()

#version normalisée par lignes 
row_norm = ct.div(ct.sum(axis=1),axis=0).round(3)
display(row_norm)

plt.figure(figsize=(6,4))
sns.heatmap(row_norm,annot=True,fmt="f",cmap="Reds")
plt.title("Chevauchement CAH vs K-means (version normalisé)")
plt.xlabel("K-means")
plt.ylabel("CAH")


plt.savefig("matrice_cluster_normalisee.png",dpi=300, bbox_inches="tight")
plt.show()

#metriques globales d'accord 

ari= adjusted_rand_score(df_compar["Cluster_CAH"], df_compar["Cluster_Kmeans"])
nmi= normalized_mutual_info_score(df_compar["Cluster_CAH"], df_compar["Cluster_Kmeans"])

print(f"Accord global : ARI = {ari:.3f} | NMI = {nmi:.3f}")

Accord global : ARI = 0.569 | NMI = 0.513

from sklearn.decomposition import PCA

X_scaled_inverse = X_scaled.copy()
X_scaled_inverse["Aliments avicoles (Milliers de tonnes)"]= -X_scaled_inverse["Aliments avicoles (Milliers de tonnes)"]
X_scaled_inverse["Surface agricole par hab (ha/hab)"]= -X_scaled_inverse["Surface agricole par hab (ha/hab)"]
X_scaled_inverse.rename(columns={"Aliments avicoles (Milliers de tonnes)": "Aliments avicoles (Milliers de tonnes)_inverse","Surface agricole par hab (ha/hab)":"Surface agricole par hab (ha/hab)_inverse"}, inplace=True)
X_scaled_inverse.head()

X_mat= X_scaled_inverse.values
X_cols= list(X_scaled_inverse.columns)

#acp
pca= PCA(n_components=min(X_mat.shape[0], X_mat.shape[1]))
X_pca= pca.fit_transform(X_mat)
expl_var= pca.explained_variance_ratio_

#scree plot (variance expliquée) 
plt.figure(figsize=(6,4))
plt.plot(np.arange(1,len(expl_var)+1),expl_var*100, marker='o')
plt.xticks(np.arange(1,len(expl_var)+1))
plt.xlabel("Composante principale")
plt.ylabel("Variance expliquée (%)")
plt.title("Scree plot (variance expliquée par composante)")
plt.grid(True,linestyle=':')

plt.savefig("ScreePlot_acp.png",dpi=300, bbox_inches="tight")
plt.show()


print("Variance expliquée cumulée (PC1..PC2):")
print(np.cumsum(expl_var)[:2]) #les 2 premières, ajustement si besoin

Variance expliquée cumulée (PC1..PC2):
[0.23448811 0.40598155]

cum_var = np.cumsum(expl_var) * 100   # cumul en pourcentage

plt.figure(figsize=(6,4))
plt.plot(np.arange(1, len(cum_var)+1), cum_var, marker='o')
plt.xticks(np.arange(1, len(cum_var)+1))
plt.xlabel("Composante principale")
plt.ylabel("Variance expliquée cumulée (%)")
plt.title("Cumul de la variance expliquée")
plt.grid(True, linestyle=':')

plt.savefig("cumul_acp.png",dpi=300, bbox_inches="tight")
plt.show()

# coefficients des variables sur les axes (loadings)
# formule: loadings = vecteurs propres * sqrt(valeurs propres)
# ici: pca.components_.T * sqrt(pca.explained_variance_)
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)

loadings_df= pd.DataFrame(
    loadings[:, :5], #les 5 premières composantes
    index=X_cols,
    columns=[f"PC{i+1}" for i in range(5)]
)

for col in loadings_df.columns:
    print(f"\nTop contributions pour {col}:")
    print(loadings_df[col].abs().sort_values(ascending=False).head(8))

Top contributions pour PC1:
Quantité de produits avicoles (kg/hab/an)         0.846205
Stabilité politique                               0.731785
RNB/hab (USD)                                     0.730111
CAGR (%/an), 2000-2018                            0.519714
Importations - Quantité                           0.458340
Part avicole (%)                                  0.381415
TDI_avicole (%)                                   0.360861
Aliments avicoles (Milliers de tonnes)_inverse    0.188094
Name: PC1, dtype: float64

Top contributions pour PC2:
Pop_2017                                          0.896411
Aliments avicoles (Milliers de tonnes)_inverse    0.866825
TDI_avicole (%)                                   0.404486
Stabilité politique                               0.279572
Surface agricole par hab (ha/hab)_inverse         0.187967
Importations - Quantité                           0.173237
Part avicole (%)                                  0.120882
CAGR (%/an), 2000-2018                            0.093849
Name: PC2, dtype: float64

Top contributions pour PC3:
Part avicole (%)                                  0.742575
Surface agricole par hab (ha/hab)_inverse         0.621784
taux de croissance du PIB (%)                     0.450599
Stabilité politique                               0.304041
RNB/hab (USD)                                     0.288062
Quantité de produits avicoles (kg/hab/an)         0.224284
Aliments avicoles (Milliers de tonnes)_inverse    0.208188
CAGR (%/an), 2000-2018                            0.137712
Name: PC3, dtype: float64

Top contributions pour PC4:
Importations - Quantité                       0.622865
TDI_avicole (%)                               0.590447
CAGR (%/an), 2000-2018                        0.546481
taux de croissance du PIB (%)                 0.280769
Stabilité politique                           0.193930
Quantité de produits avicoles (kg/hab/an)     0.155931
Pop_2017                                      0.108523
Part avicole (%)                              0.101056
Name: PC4, dtype: float64

Top contributions pour PC5:
taux de croissance du PIB (%)                 0.827429
Part avicole (%)                              0.257308
TDI_avicole (%)                               0.243489
RNB/hab (USD)                                 0.230117
Importations - Quantité                       0.151054
Surface agricole par hab (ha/hab)_inverse     0.113377
CAGR (%/an), 2000-2018                        0.102768
Quantité de produits avicoles (kg/hab/an)     0.071682
Name: PC5, dtype: float64

#projections PC1/PC2 colorées par clusters 

proj = pd.DataFrame({
    "PC1": X_pca[:,0],
    "PC2": X_pca[:,1],
    "Pays": ids["Pays"].values,
    "Cluster_CAH": clusters_cah,
    "Cluster_Kmeans": clusters_km
})

#cah
plt.figure(figsize=(6,5))
for c in sorted(proj["Cluster_CAH"].unique()):
    sub = proj[proj["Cluster_CAH"]==c]
    plt.scatter(sub["PC1"], sub["PC2"], label=f"CAH {c}", alpha=0.7, s=25)
plt.axhline(0, color='grey', linewidth=0.5)
plt.axvline(0, color='grey', linewidth=0.5)
plt.xlabel(f"PC1 ({expl_var[0]*100:.1f}%)")
plt.ylabel(f"PC2 ({expl_var[1]*100:.1f}%)")
plt.title("Projection ACP des pays — colorée par CAH")
plt.legend(ncol=2, fontsize=8, frameon=False)
plt.grid(True, linestyle=':')
plt.savefig("ACP_PC1_PC2_CAH.png",dpi=300, bbox_inches="tight")
plt.show()

#k-means
plt.figure(figsize=(6,5))
for c in sorted(proj["Cluster_Kmeans"].unique()):
    sub = proj[proj["Cluster_Kmeans"]==c]
    plt.scatter(sub["PC1"], sub["PC2"], label=f"Kmeans {c}", alpha=0.7, s=25)
plt.axhline(0, color='grey', linewidth=0.5)
plt.axvline(0, color='grey', linewidth=0.5)
plt.xlabel(f"PC1 ({expl_var[0]*100:.1f}%)")
plt.ylabel(f"PC2 ({expl_var[1]*100:.1f}%)")
plt.title("Projection ACP des pays — colorée par K-means")
plt.legend(ncol=2, fontsize=8, frameon=False)
plt.grid(True, linestyle=':')
plt.savefig("ACP_PC1_PC2_KM.png",dpi=300, bbox_inches="tight")
plt.show()

#cercle de corrélation


fig, ax = plt.subplots(figsize=(6,6))
# cercle unité
theta = np.linspace(0, 2*np.pi, 200)
ax.plot(np.cos(theta), np.sin(theta), linestyle='--', linewidth=0.7)
ax.axhline(0, color='grey', linewidth=0.5)
ax.axvline(0, color='grey', linewidth=0.5)

# flèches + labels
for i, var in enumerate(X_cols):
    x, y = loadings[i, 0], loadings[i, 1]
    ax.arrow(0, 0, x, y, head_width=0.03, length_includes_head=True, alpha=0.8)
    ax.text(x*1.1, y*1.1, var, fontsize=9)

ax.set_xlim(-1.1, 1.1)
ax.set_ylim(-1.1, 1.1)
ax.set_xlabel(f"PC1 ({expl_var[0]*100:.1f}%)")
ax.set_ylabel(f"PC2 ({expl_var[1]*100:.1f}%)")
ax.set_title("Cercle des corrélations (PC1/PC2)")
ax.set_aspect('equal', 'box')
plt.grid(True, linestyle=':')
plt.savefig("cercle_corr_PC1_PC2.png",dpi=300, bbox_inches="tight")
plt.show()



# Tableau d’aide à l’interprétation des axes 
loadings_df = pd.DataFrame(loadings[:, :2], index=X_cols, columns=["PC1", "PC2"])
# variables qui “pèsent” le plus sur chaque axe
print("Top contributions | PC1 (absolu décroissant):")
print(loadings_df.reindex(loadings_df["PC1"].abs().sort_values(ascending=False).index).head(8))
print("\nTop contributions | PC2 (absolu décroissant):")
print(loadings_df.reindex(loadings_df["PC2"].abs().sort_values(ascending=False).index).head(8))

Top contributions | PC1 (absolu décroissant):
                                                     PC1       PC2
Quantité de produits avicoles (kg/hab/an)       0.846205  0.062817
Stabilité politique                             0.731785 -0.279572
RNB/hab (USD)                                   0.730111 -0.078901
CAGR (%/an), 2000-2018                         -0.519714 -0.093849
Importations - Quantité                         0.458340  0.173237
Part avicole (%)                                0.381415  0.120882
TDI_avicole (%)                                 0.360861 -0.404486
Aliments avicoles (Milliers de tonnes)_inverse -0.188094 -0.866825

Top contributions | PC2 (absolu décroissant):
                                                     PC1       PC2
Pop_2017                                       -0.001353  0.896411
Aliments avicoles (Milliers de tonnes)_inverse -0.188094 -0.866825
TDI_avicole (%)                                 0.360861 -0.404486
Stabilité politique                             0.731785 -0.279572
Surface agricole par hab (ha/hab)_inverse       0.118686  0.187967
Importations - Quantité                         0.458340  0.173237
Part avicole (%)                                0.381415  0.120882
CAGR (%/an), 2000-2018                         -0.519714 -0.093849

df_compar #rappel

#liste de clusters à bannir 
ban_cah = {1,2,4}
ban_km = {1,2,4}

#identifiants candidats 
mask_ban = df_compar["Cluster_CAH"].isin(ban_cah) | df_compar["Cluster_Kmeans"].isin(ban_km)
ids_bannis = df_compar.index[mask_ban]
ids_candidats = df_compar.index.difference(ids_bannis)

len(ids_candidats)

92

len(ids_bannis)

73

df_compar_candidats= df_compar.loc[ids_candidats]
df_compar_candidats

# projeter QUE les candidats avec la PCA déjà “fit” (pca.fit() fait avant)
X_sub = X_scaled_inverse.loc[ids_candidats].values        # features normalisées/inversées, filtrées
X_pca_sub = pca.transform(X_sub)                   # PAS fit(), juste transform()

#projections PC3/PC4 colorées par clusters 

proj = pd.DataFrame({
    "PC3": X_pca_sub[:, 2],
    "PC4": X_pca_sub[:, 3],
    "Pays": df_compar_candidats["Pays"].values,
    "Cluster_CAH": df_compar_candidats["Cluster_CAH"].values,
    "Cluster_Kmeans": df_compar_candidats["Cluster_Kmeans"].values
})

#cah
plt.figure(figsize=(6,5))
for c in sorted(proj["Cluster_CAH"].unique()):
    sub = proj[proj["Cluster_CAH"]==c]
    plt.scatter(sub["PC3"], sub["PC4"], label=f"CAH {c}", alpha=0.7, s=25)
plt.axhline(0, color='grey', linewidth=0.5)
plt.axvline(0, color='grey', linewidth=0.5)
plt.axis("equal")
plt.xlabel(f"PC3 ({expl_var[2]*100:.1f}%)")
plt.ylabel(f"PC4 ({expl_var[3]*100:.1f}%)")
plt.title("Projection ACP des pays — colorée par CAH")
plt.legend(ncol=2, fontsize=8, frameon=False)
plt.grid(True, linestyle=':')
plt.savefig("ACP_PC3_PC4_CAH.png",dpi=300, bbox_inches="tight")
plt.show()

#k-means
plt.figure(figsize=(6,5))
for c in sorted(proj["Cluster_Kmeans"].unique()):
    sub = proj[proj["Cluster_Kmeans"]==c]
    plt.scatter(sub["PC3"], sub["PC4"], label=f"Kmeans {c}", alpha=0.7, s=25)
plt.axhline(0, color='grey', linewidth=0.5)
plt.axvline(0, color='grey', linewidth=0.5)
plt.axis("equal")
plt.xlabel(f"PC3 ({expl_var[2]*100:.1f}%)")
plt.ylabel(f"PC4 ({expl_var[3]*100:.1f}%)")
plt.title("Projection ACP des pays — colorée par K-means")
plt.legend(ncol=2, fontsize=8, frameon=False)
plt.grid(True, linestyle=':')
plt.savefig("ACP_PC3_PC4_KM.png",dpi=300, bbox_inches="tight")
plt.show()

#cercle de corrélation
loadings = pca.components_.T * np.sqrt(pca.explained_variance_)


fig, ax = plt.subplots(figsize=(6,6))
# cercle unité
theta = np.linspace(0, 2*np.pi, 200)
ax.plot(np.cos(theta), np.sin(theta), linestyle='--', linewidth=0.7)
ax.axhline(0, color='grey', linewidth=0.5)
ax.axvline(0, color='grey', linewidth=0.5)

# flèches + labels
for i, var in enumerate(X_cols):
    x, y = loadings[i, 2], loadings[i, 3]
    ax.arrow(0, 0, x, y, head_width=0.03, length_includes_head=True, alpha=0.8)
    ax.text(x*1.1, y*1.1, var, fontsize=9)

ax.set_xlim(-1.1, 1.1)
ax.set_ylim(-1.1, 1.1)
ax.set_xlabel(f"PC3 ({expl_var[2]*100:.1f}%)")
ax.set_ylabel(f"PC4 ({expl_var[3]*100:.1f}%)")
ax.set_title("Cercle des corrélations (PC3/PC4)")
ax.set_aspect('equal', 'box')
plt.grid(True, linestyle=':')
plt.savefig("cercle_corr_PC3_PC4.png",dpi=300, bbox_inches="tight")
plt.show()

# Tableau d’aide à l’interprétation des axes 
loadings_df = pd.DataFrame(loadings[:,[2,3]], index=X_cols, columns=["PC3", "PC4"])

# variables qui “pèsent” le plus sur chaque axe
print("Top contributions | PC3 (absolu décroissant):")
print(loadings_df.reindex(loadings_df["PC3"].abs().sort_values(ascending=False).index).head(8))
print("\nTop contributions | PC4 (absolu décroissant):")
print(loadings_df.reindex(loadings_df["PC4"].abs().sort_values(ascending=False).index).head(8))

Top contributions | PC3 (absolu décroissant):
                                                     PC3       PC4
Part avicole (%)                                0.742575 -0.101056
Surface agricole par hab (ha/hab)_inverse       0.621784 -0.067042
taux de croissance du PIB (%)                  -0.450599 -0.280769
Stabilité politique                            -0.304041 -0.193930
RNB/hab (USD)                                  -0.288062  0.092067
Quantité de produits avicoles (kg/hab/an)       0.224284 -0.155931
Aliments avicoles (Milliers de tonnes)_inverse  0.208188 -0.099236
CAGR (%/an), 2000-2018                          0.137712  0.546481

Top contributions | PC4 (absolu décroissant):
                                                 PC3       PC4
Importations - Quantité                    -0.071822  0.622865
TDI_avicole (%)                            -0.007633  0.590447
CAGR (%/an), 2000-2018                      0.137712  0.546481
taux de croissance du PIB (%)              -0.450599 -0.280769
Stabilité politique                        -0.304041 -0.193930
Quantité de produits avicoles (kg/hab/an)   0.224284 -0.155931
Pop_2017                                   -0.116197  0.108523
Part avicole (%)                            0.742575 -0.101056

# Projeter uniquement les pays candidats
Xpca_candidats = pca.transform(X_scaled_inverse.loc[ids_candidats].values)

# DataFrame avec les coordonnées PCA
proj_pays = pd.DataFrame(
    Xpca_candidats[:, :2],  # PC1 et PC2
    index=ids_candidats,
    columns=["PC1", "PC2"]
)
proj_pays["Pays"] = df_compar_candidats["Pays"]

# Scatterplot 
plt.figure(figsize=(10,8))
plt.scatter(proj_pays["PC1"], proj_pays["PC2"], alpha=0.7)

# Ajouter les noms des pays
for _, row in proj_pays.iterrows():
    plt.text(row["PC1"], row["PC2"], row["Pays"], fontsize=8, alpha=0.7)

plt.axhline(0, color="grey", linestyle="--", linewidth=0.5)
plt.axvline(0, color="grey", linestyle="--", linewidth=0.5)
plt.xlabel(f"PC1 ({expl_var[0]*100:.1f}%)")
plt.ylabel(f"PC2 ({expl_var[1]*100:.1f}%)")
plt.title("Projection ACP des pays candidats (PC1–PC2)")

plt.savefig("ACP_projPays_PC1PC2.png",dpi=300, bbox_inches="tight")
plt.show()

# Projeter uniquement les pays candidats
Xpca_candidats = pca.transform(X_scaled_inverse.loc[ids_candidats].values)

# DataFrame avec les coordonnées PCA
proj_pays = pd.DataFrame(
    Xpca_candidats[:, [2,3]],  # PC3 et PC4
    index=ids_candidats,
    columns=["PC3", "PC4"]
)
proj_pays["Pays"] = df_compar_candidats["Pays"]

# Scatterplot avec labels
plt.figure(figsize=(10,8))
plt.scatter(proj_pays["PC3"], proj_pays["PC4"], alpha=0.7)

# Ajouter les noms des pays
for _, row in proj_pays.iterrows():
    plt.text(row["PC3"], row["PC4"], row["Pays"], fontsize=8, alpha=0.7)

plt.axhline(0, color="grey", linestyle="--", linewidth=0.5)
plt.axvline(0, color="grey", linestyle="--", linewidth=0.5)
plt.xlabel(f"PC3 ({expl_var[2]*100:.1f}%)")
plt.ylabel(f"PC4 ({expl_var[3]*100:.1f}%)")
plt.title("Projection ACP des pays candidats (PC3–PC4)")

plt.savefig("ACP_projPays_PC3PC4.png",dpi=300, bbox_inches="tight")
plt.show()

# Point de départ 
df_base = df_compar_candidats.copy()


# Colonnes utilisées par axe
cols_import = ["TDI_avicole (%)", "Importations - Quantité"]
cols_stab   = ["Stabilité politique", "RNB/hab (USD)"]
cols_marche = ["Pop_2017", "Quantité de produits avicoles (kg/hab/an) "]


# Normalisation (z-score) 
scaler = StandardScaler()
Z_import = pd.DataFrame(scaler.fit_transform(df_base[cols_import]),
                        index=df_base.index, columns=cols_import)
Z_stab   = pd.DataFrame(scaler.fit_transform(df_base[cols_stab]),
                        index=df_base.index, columns=cols_stab)
Z_marche = pd.DataFrame(scaler.fit_transform(df_base[cols_marche]),
                        index=df_base.index, columns=cols_marche)

# Scores par catégorie  
df_scores = pd.DataFrame(index=df_base.index)
df_scores["Score_Import"] = Z_import.mean(axis=1) # Moyenne simple des z-scores
df_scores["Score_Stab"]   = Z_stab.mean(axis=1)
df_scores["Score_Marche"] = Z_marche.mean(axis=1)

# Joindre le nom des pays 
df_scores = df_scores.join(df_base[["Pays"]])



# Top 10 par catégorie 
top_k = 10
top_import = df_scores.sort_values("Score_Import", ascending=False).head(top_k)
top_stab   = df_scores.sort_values("Score_Stab",   ascending=False).head(top_k)
top_marche = df_scores.sort_values("Score_Marche", ascending=False).head(top_k)

print("— Top importateurs —")
display(top_import[["Pays","Score_Import"]])

print("— Top stables (politique + RNB) —")
display(top_stab[["Pays","Score_Stab"]])

print("— Top marchés (population + conso avicole) —")
display(top_marche[["Pays","Score_Marche"]])

— Top importateurs —

— Top stables (politique + RNB) —

— Top marchés (population + conso avicole) —

# Croisement des listes 

# Fréquence d’apparition dans les 3 tops
tops_union = pd.concat([
    top_import.assign(List="Import"),
    top_stab.assign(List="Stab"),
    top_marche.assign(List="Marche")
])

freq = (tops_union
        .groupby(["Pays"])
        .size()
        .rename("Frequence")
        .reset_index())

# Score global 
w_import, w_stab, w_marche = 0.5, 0.25, 0.25  #poids
global_score = (df_scores["Score_Import"]*w_import
                + df_scores["Score_Stab"]*w_stab
                + df_scores["Score_Marche"]*w_marche)

df_global = (df_scores.assign(Score_Global=global_score)
             .sort_values("Score_Global", ascending=False))

# Shortlist = pays apparaissant au moins une fois dans les tops, triés par :
# 1) fréquence (desc), puis 2) score global (desc)
shortlist = (df_global.reset_index()
             .merge(freq, on="Pays", how="inner")
             .sort_values(["Frequence","Score_Global"], ascending=False))

print("— Shortlist (union des 3 tops, triée par fréquence puis score global) —")
display(shortlist[["Pays","Frequence","Score_Import","Score_Stab","Score_Marche","Score_Global"]].head(30))




# Classement final suggéré
N = 10
print(f"— Top {N} Global —")
display(df_global.reset_index()[["Pays","Score_Import","Score_Stab","Score_Marche","Score_Global"]].head(N))

— Shortlist (union des 3 tops, triée par fréquence puis score global) —

— Top 10 Global —

# 1) Ajouter le score global au DataFrame
df_scores["Score_Global"] = global_score

# 2) Trier selon le score global décroissant
top15 = df_scores.sort_values("Score_Global", ascending=False).head(15)

# 3) Joindre avec df_finale pour récupérer tous les indicateurs
top15_full = df_finale.loc[top15.index].copy()
top15_full["Score_Global"] = top15["Score_Global"]

top15_full.head(7) #top 10 remis à top 7

	Code zone	Pays	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
0	1	Arménie	36.0	42.86	9.305043	4094.332429	27.70	48.35	-0.217251	138.0	2944791.0	0.5692	-0.63
1	2	Afghanistan	83.0	63.36	4.444345	539.263388	3.03	30.03	3.283677	188.0	36296113.0	1.0445	-2.79
2	3	Albanie	38.0	37.62	9.767331	4503.239709	31.02	49.90	-0.454795	398.0	2884169.0	0.4071	0.37
3	4	Algérie	2.0	0.30	6.287893	4027.655361	14.81	55.93	1.724390	3973.0	41389189.0	0.9987	-0.92
4	7	Angola	300.0	86.46	20.766649	3788.156996	11.23	46.50	3.566730	789.0	29816766.0	1.5349	-0.39

	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
0	-0.308024	0.219158	0.252456	-0.529438	0.016604	-0.012396	-1.317314	-0.218877	-0.261912	-0.206334	-0.635762
1	-0.096950	0.754790	-0.522120	-0.730202	-1.307058	-0.969047	1.548387	-0.216672	-0.047697	-0.074372	-3.085598
2	-0.299042	0.082246	0.326124	-0.506346	0.194737	0.068543	-1.511757	-0.207412	-0.262301	-0.251339	0.498422
3	-0.460716	-0.892864	-0.228342	-0.533203	-0.675006	0.383423	0.272026	-0.049768	-0.014984	-0.087088	-0.964675
4	0.877584	1.358355	2.078921	-0.546728	-0.867090	-0.109001	1.780081	-0.190170	-0.089313	0.061783	-0.363557

	Code zone	Pays	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
0	1	Arménie	-0.308024	0.219158	0.252456	-0.529438	0.016604	-0.012396	-1.317314	-0.218877	-0.261912	-0.206334	-0.635762
1	2	Afghanistan	-0.096950	0.754790	-0.522120	-0.730202	-1.307058	-0.969047	1.548387	-0.216672	-0.047697	-0.074372	-3.085598
2	3	Albanie	-0.299042	0.082246	0.326124	-0.506346	0.194737	0.068543	-1.511757	-0.207412	-0.262301	-0.251339	0.498422
3	4	Algérie	-0.460716	-0.892864	-0.228342	-0.533203	-0.675006	0.383423	0.272026	-0.049768	-0.014984	-0.087088	-0.964675
4	7	Angola	0.877584	1.358355	2.078921	-0.546728	-0.867090	-0.109001	1.780081	-0.190170	-0.089313	0.061783	-0.363557

	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
Cluster_CAH
1	-0.33	-0.60	-0.09	-0.61	-0.76	-0.27	0.53	-0.14	-0.02	-0.13	-0.84
2	0.45	-0.89	0.28	0.56	0.53	0.45	-0.38	5.71	6.32	-0.20	-0.20
3	3.67	1.31	-0.35	1.14	0.74	0.25	-0.29	0.12	0.08	-0.15	0.13
4	-0.26	0.39	0.51	-0.01	-0.44	-1.07	0.31	-0.16	-0.22	4.29	0.59
5	-0.23	1.18	-0.30	-0.13	0.33	0.60	0.57	-0.22	-0.26	-0.26	0.39
6	-0.11	-0.22	0.24	0.49	0.50	-0.00	-0.82	-0.04	-0.16	-0.14	0.58

	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
Cluster_CAH
1	30.80	11.60	7.17	2730.90	13.31	43.48	2.04	1905.59	4.116146e+07	0.85	-0.81
2	204.33	0.50	9.48	23442.59	37.31	57.12	0.93	134609.33	1.028261e+09	0.61	-0.25
3	922.22	84.42	5.53	33732.56	41.16	53.38	1.04	7877.78	5.645192e+07	0.78	0.05
4	46.00	49.47	10.91	13260.63	19.11	28.09	1.77	1480.17	9.111451e+06	16.78	0.45
5	52.52	79.48	5.82	11104.90	33.62	60.10	2.08	184.84	3.467403e+06	0.36	0.27
6	81.11	25.99	9.25	22168.45	36.75	48.57	0.39	4210.49	1.808068e+07	0.80	0.44

Produisez une étude de marché avec Python¶

Notebook 2 : Clustering et les différentes visualisations associées¶

1) Importation de df_finale et normalisation¶

2) Clustering 1 : CAH (avec un dendrogramme comme visualisation)¶

3) Clustering 2 : k-means¶

4) Comparaison des résultats entre les 2 méthodes de clustering¶

5) ACP¶

	k	inertie	silhouette
0	2	1512.554098	0.192076
1	3	1299.699721	0.197382
2	4	1160.438616	0.177554
3	5	1053.864566	0.215808
4	6	944.060500	0.187163
5	7	875.564919	0.185229
6	8	814.408497	0.168108
7	9	740.652893	0.190504

	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
Cluster_kmeans
0	0.40	1.27	0.09	-0.13	-0.02	0.65	1.00	-0.21	-0.22	-0.04	0.12
1	-0.36	-0.47	-0.10	-0.61	-0.83	-0.49	0.49	-0.15	-0.03	0.03	-0.79
2	0.55	-0.89	0.69	-0.46	-0.38	0.61	-0.34	5.54	8.58	-0.29	-0.49
3	0.14	-0.05	0.04	0.56	0.71	0.18	-0.74	0.04	-0.12	-0.13	0.60
4	-0.39	1.14	-0.80	-0.58	-1.10	-2.15	0.14	-0.22	-0.26	9.75	0.96

	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique
Cluster_kmeans
0	193.96	83.09	8.28	11175.59	26.97	60.94	2.62	423.08	9.031644e+06	1.17	0.03
1	24.85	16.56	7.10	2632.50	11.97	39.27	2.00	1811.82	3.843446e+07	1.42	-0.76
2	226.00	0.42	12.08	5307.07	20.34	60.18	0.97	130759.50	1.379849e+09	0.25	-0.50
3	136.37	32.51	7.98	23433.59	40.54	52.12	0.49	6115.37	2.495995e+07	0.83	0.46
4	18.00	78.26	2.68	3180.08	6.84	7.40	1.56	74.00	3.113786e+06	36.44	0.78

Cluster_Kmeans	0	1	2	3	4
Cluster_CAH
1	0.051	0.898	0.000	0.051	0.000
2	0.000	0.000	0.667	0.333	0.000
3	0.444	0.000	0.000	0.556	0.000
4	0.167	0.500	0.000	0.167	0.167
5	0.581	0.097	0.000	0.323	0.000
6	0.000	0.035	0.000	0.965	0.000

	Pays	Score_Import
60	Chine - RAS de Hong-Kong	4.106069
102	Pays-Bas	2.309053
49	Allemagne	2.100781
163	Belgique	1.688368
67	Iraq	1.617850
74	Japon	1.349249
150	Émirats arabes unis	1.213155
95	Mexique	1.184136
152	Royaume-Uni de Grande-Bretagne et d'Irlande du...	1.066260
129	Arabie saoudite	1.003932

	Pays	Score_Stab
140	Suisse	2.186940
164	Luxembourg	2.181117
110	Norvège	2.063446
63	Islande	2.035925
106	Nouvelle-Zélande	1.401231
68	Irlande	1.376526
34	Danemark	1.322035
139	Suède	1.289857
43	Finlande	1.160790
8	Autriche	1.152431

	Pays	Score_Marche
14	Brésil	3.405720
124	Fédération de Russie	2.200769
95	Mexique	2.049959
74	Japon	1.648299
69	Israël	1.152367
6	Argentine	1.045904
127	Saint-Vincent-et-les Grenadines	1.013420
60	Chine - RAS de Hong-Kong	0.966124
152	Royaume-Uni de Grande-Bretagne et d'Irlande du...	0.912401
89	Malaisie	0.835252

	Pays	Frequence	Score_Import	Score_Stab	Score_Marche	Score_Global
0	Chine - RAS de Hong-Kong	2	4.106069	1.017854	0.966124	2.549029
3	Japon	2	1.349249	1.031270	1.648299	1.344517
5	Royaume-Uni de Grande-Bretagne et d'Irlande du...	2	1.066260	0.487302	0.912401	0.883056
7	Mexique	2	1.184136	-1.185866	2.049959	0.808091
1	Allemagne	1	2.100781	0.770766	0.744984	1.429328
2	Pays-Bas	1	2.309053	1.091669	-0.103772	1.401501
4	Belgique	1	1.688368	0.636508	-0.464837	0.887102
6	Émirats arabes unis	1	1.213155	0.719020	0.317527	0.865714
8	Luxembourg	1	0.333260	2.181117	-0.364036	0.620900
9	Arabie saoudite	1	1.003932	-0.701999	0.633298	0.484791
10	Danemark	1	0.191372	1.322035	0.062524	0.441826
11	Saint-Vincent-et-les Grenadines	1	0.254144	0.028765	1.013420	0.387618
12	Irlande	1	0.090079	1.376526	-0.268956	0.321932
13	Suisse	1	-0.248107	2.186940	-0.512196	0.294632
14	Brésil	1	-0.855471	-0.927351	3.405720	0.191857
15	Autriche	1	-0.067505	1.152431	-0.275352	0.185517
16	Fédération de Russie	1	-0.237674	-1.031466	2.200769	0.173488
17	Iraq	1	1.617850	-2.424729	-0.125069	0.171475
18	Islande	1	-0.706510	2.035925	-0.142836	0.120017
19	Suède	1	-0.338013	1.289857	-0.349382	0.066112
20	Norvège	1	-0.836592	2.063446	-0.400389	-0.002532
21	Nouvelle-Zélande	1	-0.843687	1.401231	0.061501	-0.056161
22	Finlande	1	-0.719508	1.160790	-0.498254	-0.194120
23	Israël	1	-0.839444	-0.393745	1.152367	-0.230066
24	Argentine	1	-0.845111	-0.340445	1.045904	-0.246191
25	Malaisie	1	-0.707689	-0.495479	0.835252	-0.268901

	Code zone	Pays	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique	Score_Global
60	96	Chine - RAS de Hong-Kong	1074.0	248.61	6.361267	47831.226310	72.04	46.30	0.610884	8.0	7306322.0	0.0007	0.82	2.549029
49	79	Allemagne	1333.0	46.69	6.368624	45470.601480	30.69	31.00	0.116470	14063.0	82658409.0	0.2019	0.57	1.429328
102	150	Pays-Bas	880.0	130.18	6.406410	48092.791505	34.45	38.31	0.382652	7024.0	17021347.0	0.1069	0.91	1.401501
74	110	Japon	1105.0	18.71	-1.455734	40241.325870	38.14	55.30	-0.014044	12181.0	127502725.0	0.0375	1.10	1.344517
163	255	Belgique	458.0	141.36	5.616445	44598.085232	25.83	32.71	0.615207	2164.0	11419748.0	0.1162	0.42	0.887102
152	229	Royaume-Uni de Grande-Bretagne et d'Irlande du...	880.0	28.59	-0.329817	39958.102362	43.17	47.37	0.728017	7987.0	66727461.0	0.2618	0.38	0.883056
150	225	Émirats arabes unis	478.0	99.58	5.757934	42590.456631	50.95	73.31	6.435566	628.0	9487203.0	0.0405	0.60	0.865714

	Code zone	Pays	Importations - Quantité	TDI_avicole (%)	taux de croissance du PIB (%)	RNB/hab (USD)	Quantité de produits avicoles (kg/hab/an)	Part avicole (%)	CAGR (%/an), 2000-2018	Aliments avicoles (Milliers de tonnes)	Pop_2017	Surface agricole par hab (ha/hab)	Stabilité politique	Cluster_CAH	Cluster_Kmeans
0	1	Arménie	36.0	42.86	9.305043	4094.332429	27.70	48.35	-0.217251	138.0	2944791.0	0.5692	-0.63	6	3
2	3	Albanie	38.0	37.62	9.767331	4503.239709	31.02	49.90	-0.454795	398.0	2884169.0	0.4071	0.37	6	3
5	8	Antigua-et-Barbuda	7.0	100.00	2.784684	16458.928678	56.98	76.06	1.321878	0.0	95426.0	0.0943	0.73	5	0
6	9	Argentine	8.0	0.29	15.442348	14161.574384	57.74	46.24	1.032766	20457.0	43937140.0	2.6772	0.16	6	3
8	11	Autriche	141.0	44.34	5.394834	46977.963467	32.89	32.27	0.540452	1838.0	8819901.0	0.3010	1.03	6	3
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
153	230	Ukraine	127.0	6.81	20.067978	2502.811657	37.05	60.39	-0.547062	4856.0	44487709.0	0.9326	-1.87	6	3
156	234	Uruguay	3.0	3.66	13.091758	18141.986754	20.95	30.14	0.212902	499.0	3436641.0	4.1386	1.04	6	3
160	244	Samoa	17.0	100.00	1.321479	4187.343785	66.96	67.74	0.652741	0.0	195352.0	0.3174	1.16	5	3
163	255	Belgique	458.0	141.36	5.616445	44598.085232	25.83	32.71	0.615207	2164.0	11419748.0	0.1162	0.42	3	0
164	256	Luxembourg	19.0	95.00	5.617920	78845.712457	33.91	35.46	1.828184	65.0	591910.0	0.2216	1.31	6	3