- 特徴量の作成
- カテゴリカルデータの前処理
などを行ったので再度次元削減を行い、結果を確認してみます
PCAでデータを2次元に圧縮して可視化
pca2 = PCA(n_components=2)
normalizeData = StandardScaler().fit_transform(fnData.values)
dim2PcaData = pca2.fit_transform(normalizeData)
dim2PcaData = pd.DataFrame(dim2PcaData)
dim2PcaData
0 | 1 | |
---|---|---|
0 | -2.617890 | 1.230049 |
1 | -0.883489 | 1.215867 |
2 | -0.986500 | -0.201632 |
3 | -0.295452 | -0.463923 |
4 | -0.358544 | -0.585817 |
… | … | … |
995 | -1.110524 | -0.645389 |
996 | 0.584318 | 0.394180 |
997 | -4.441661 | -0.175817 |
998 | -1.463922 | 0.208419 |
999 | 2.055026 | 0.118375 |
num = len(data['default.payment.next.month'].unique())
fig = plt.figure()
for i in range(num):
target = dim2PcaData[data['default.payment.next.month'] == i]
plt.scatter(x=target.iloc[:, 0], y=target.iloc[:, 1], label=str(i), alpha=0.5)
t-SNEでデータを2次元に圧縮して可視化
dim2TsneData = TSNE(n_components=2).fit_transform(fnData.iloc[:,:-1])
dim2TsneData = pd.DataFrame(dim2TsneData)
dim2TsneData
0 | 1 | |
---|---|---|
0 | 30.335127 | -39.175529 |
1 | -9.754200 | -14.677676 |
2 | 2.735364 | -14.194139 |
3 | 17.963379 | -5.905255 |
4 | 15.242915 | -26.766068 |
… | … | … |
995 | -20.718533 | -0.357178 |
996 | -6.142908 | 0.424000 |
997 | -24.534489 | 26.326624 |
998 | 5.997464 | -32.502254 |
999 | 5.874343 | 14.879034 |
num = len(data['default.payment.next.month'].unique())
fig = plt.figure()
for i in range(num):
target = dim2TsneData[data['default.payment.next.month'] == i]
plt.scatter(x=target.iloc[:, 0], y=target.iloc[:, 1], label=str(i), alpha=0.5)
PCAでデータ3次元に圧縮して可視化
pca3 = PCA(n_components=3)
normalizeData = StandardScaler().fit_transform(fnData.iloc[:,:-1].values)
dim3PcaData = pca3.fit_transform(normalizeData)
dim3PcaData = pd.DataFrame(dim3PcaData)
dim3PcaData
0 | 1 | 2 | |
---|---|---|---|
0 | -2.860972 | 0.440091 | 1.398824 |
1 | -1.145599 | 0.763652 | 1.888266 |
2 | -0.877400 | -0.210833 | -0.358869 |
3 | -0.146323 | -0.327251 | -0.425060 |
4 | -0.185465 | -0.456240 | -0.463068 |
… | … | … | … |
995 | -0.910446 | -0.634648 | -0.800143 |
996 | 0.464402 | 0.297923 | 1.341869 |
997 | -4.330302 | -1.001982 | -0.917260 |
998 | -1.444907 | 0.012196 | 0.109259 |
999 | 2.002282 | 0.451274 | 1.098129 |
num= len(data['default.payment.next.month'].unique())
fig = plt.figure()
ax = fig.gca(projection='3d')
for i in range(num):
target =dim3PcaData[data['default.payment.next.month'] == i]
ax.scatter(
xs=target.iloc[:, 0], ys=target.iloc[:, 1], zs=target.iloc[:, 2],label=str(i), alpha=0.5)
t-SNEでデータを3次元に圧縮して可視化
dim3TsneData = TSNE(n_components=3).fit_transform(fnData.iloc[:,:-1])
dim3TsneData = pd.DataFrame(dim3TsneData)
num = len(data['default.payment.next.month'].unique())
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for i in range(num):
target = dim3TsneData[data['default.payment.next.month'] == i]
ax.scatter(xs=target.iloc[:, 0], ys=target.iloc[:, 1], zs=target.iloc[:, 2],label=str(i), alpha=0.5)
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
コメント