본문 바로가기

Python/Machine Learning

Machine Learning : Hierarchical Clusting의 Dendrogram 설명

Cust_Spend_Data.csv 파일을 통해서 고객의 의류소비, 음료소비, 음식소비 대이터를 통해서 비슷한 고객으로 그루핑 하자

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('data/Cust_Spend_Data.csv')

X = df.iloc[:, 2:]

import scipy.cluster.hierarchy as sch

sch.dendrogram( sch.linkage(X, method='ward') )
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Eculidaen Distances')
plt.show()

 

 

out

in

X.shape

 

 

out

(10, 5)

in

from sklearn.cluster import AgglomerativeClustering
y_pred = hc.fit_predict(X)

hc = []
for k in range(2, 10+1):
    kmeans = KMeans(n_clusters= k , random_state = 33)
    kmeans.fit_predict(X)
    wcss.append(kmeans.inertia_)
    
wcss

 

 

out

[14500293.600000001,
 5387740.75,
 2187714.5,
 854349.4166666667,
 333469.3333333333,
 125160.0]

 

in

plt.plot(range(2, 7+1), wcss)
plt.show()

 


out

in

kmeans = KMeans(n_clusters=4, random_state=76)
y_pred = kmeans.fit_predict(X)
y_pred

 

 

out

array([2, 0, 0, 0, 0, 3, 1, 1, 1, 1])