Cust_Spend_Data.csv 파일을 통해서 고객의 의류소비, 음료소비, 음식소비 대이터를 통해서 비슷한 고객으로 그루핑 하자
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/Cust_Spend_Data.csv')
X = df.iloc[:, 2:]
import scipy.cluster.hierarchy as sch
sch.dendrogram( sch.linkage(X, method='ward') )
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Eculidaen Distances')
plt.show()
out
in
X.shape
out
(10, 5)
in
from sklearn.cluster import AgglomerativeClustering
y_pred = hc.fit_predict(X)
hc = []
for k in range(2, 10+1):
kmeans = KMeans(n_clusters= k , random_state = 33)
kmeans.fit_predict(X)
wcss.append(kmeans.inertia_)
wcss