聚类案例之股票信息分类
附代码:
import pandas as pd
beer = pd.read_csv('E:\\1\\000518.csv',sep=',')
In [3]:
beer
Out[3]:
open | close | high | low | volume | money | |
---|---|---|---|---|---|---|
0 | 2.26 | 2.25 | 2.26 | 2.25 | 64047.0 | 144526.0 |
1 | 2.25 | 2.25 | 2.26 | 2.25 | 7020.0 | 15805.0 |
2 | 2.25 | 2.24 | 2.25 | 2.24 | 52599.0 | 118287.0 |
3 | 2.24 | 2.24 | 2.25 | 2.24 | 39098.0 | 87629.0 |
4 | 2.24 | 2.24 | 2.24 | 2.23 | 45794.0 | 102386.0 |
5 | 2.24 | 2.24 | 2.24 | 2.24 | 52923.0 | 118635.0 |
6 | 2.24 | 2.25 | 2.25 | 2.24 | 33374.0 | 74939.0 |
7 | 2.25 | 2.25 | 2.25 | 2.24 | 22465.0 | 50455.0 |
8 | 2.24 | 2.25 | 2.25 | 2.24 | 72471.0 | 162653.0 |
9 | 2.25 | 2.25 | 2.26 | 2.24 | 20845.0 | 46884.0 |
10 | 2.26 | 2.26 | 2.26 | 2.25 | 3888.0 | 8774.0 |
11 | 2.26 | 2.25 | 2.26 | 2.25 | 34886.0 | 78752.0 |
12 | 2.26 | 2.26 | 2.26 | 2.26 | 3348.0 | 7564.0 |
13 | 2.26 | 2.26 | 2.26 | 2.26 | 5184.0 | 11712.0 |
14 | 2.26 | 2.26 | 2.26 | 2.26 | 7236.0 | 16348.0 |
15 | 2.26 | 2.26 | 2.26 | 2.26 | 5400.0 | 12200.0 |
16 | 2.26 | 2.26 | 2.26 | 2.26 | 60267.0 | 136152.0 |
17 | 2.26 | 2.26 | 2.26 | 2.26 | 4320.0 | 9760.0 |
18 | 2.26 | 2.26 | 2.26 | 2.26 | 6588.0 | 14884.0 |
19 | 2.26 | 2.26 | 2.26 | 2.26 | 33482.0 | 75640.0 |
20 | 2.26 | 2.26 | 2.26 | 2.26 | 12745.0 | 28792.0 |
21 | 2.26 | 2.25 | 2.26 | 2.25 | 35642.0 | 80492.0 |
22 | 2.26 | 2.26 | 2.26 | 2.26 | 90184.0 | 203740.0 |
23 | 2.25 | 2.26 | 2.26 | 2.25 | 24841.0 | 55893.0 |
24 | 2.26 | 2.25 | 2.26 | 2.25 | 1620.0 | 3652.0 |
25 | 2.25 | 2.26 | 2.26 | 2.25 | 6804.0 | 15314.0 |
26 | 2.25 | 2.26 | 2.26 | 2.25 | 11881.0 | 26740.0 |
27 | 2.25 | 2.26 | 2.26 | 2.25 | 24409.0 | 55024.0 |
28 | 2.25 | 2.26 | 2.26 | 2.25 | 3780.0 | 8525.0 |
29 | 2.25 | 2.26 | 2.26 | 2.25 | 41366.0 | 93233.0 |
... | ... | ... | ... | ... | ... | ... |
869850 | 3.28 | 3.28 | 3.28 | 3.28 | 300.0 | 984.0 |
869851 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
869852 | 3.29 | 3.29 | 3.29 | 3.29 | 100.0 | 329.0 |
869853 | 3.29 | 3.28 | 3.29 | 3.28 | 17200.0 | 56546.0 |
869854 | 3.29 | 3.28 | 3.29 | 3.28 | 1400.0 | 4597.0 |
869855 | 3.29 | 3.29 | 3.29 | 3.29 | 200.0 | 658.0 |
869856 | 3.28 | 3.29 | 3.29 | 3.28 | 4100.0 | 13459.0 |
869857 | 3.29 | 3.28 | 3.29 | 3.28 | 1400.0 | 4604.0 |
869858 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
869859 | 3.28 | 3.28 | 3.28 | 3.28 | 15200.0 | 49856.0 |
869860 | 3.28 | 3.28 | 3.28 | 3.28 | 5600.0 | 18368.0 |
869861 | 3.28 | 3.29 | 3.29 | 3.28 | 2300.0 | 7552.0 |
869862 | 3.29 | 3.29 | 3.29 | 3.29 | 0.0 | 0.0 |
869863 | 3.28 | 3.28 | 3.28 | 3.28 | 2000.0 | 6560.0 |
869864 | 3.29 | 3.28 | 3.29 | 3.28 | 9200.0 | 30245.0 |
869865 | 3.28 | 3.28 | 3.28 | 3.28 | 500.0 | 1640.0 |
869866 | 3.28 | 3.28 | 3.28 | 3.28 | 58800.0 | 192798.0 |
869867 | 3.28 | 3.28 | 3.28 | 3.28 | 1100.0 | 3608.0 |
869868 | 3.28 | 3.27 | 3.28 | 3.27 | 38600.0 | 126372.0 |
869869 | 3.27 | 3.27 | 3.27 | 3.27 | 27700.0 | 90579.0 |
869870 | 3.28 | 3.28 | 3.28 | 3.28 | 4400.0 | 14432.0 |
869871 | 3.28 | 3.28 | 3.28 | 3.28 | 1400.0 | 4592.0 |
869872 | 3.28 | 3.28 | 3.28 | 3.27 | 17100.0 | 56027.0 |
869873 | 3.28 | 3.28 | 3.28 | 3.28 | 31300.0 | 102729.0 |
869874 | 3.28 | 3.28 | 3.28 | 3.28 | 2000.0 | 6560.0 |
869875 | 3.28 | 3.28 | 3.28 | 3.28 | 3300.0 | 10824.0 |
869876 | 3.28 | 3.28 | 3.28 | 3.28 | 1300.0 | 4264.0 |
869877 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
869878 | 3.28 | 3.28 | 3.28 | 3.28 | 0.0 | 0.0 |
869879 | 3.28 | 3.29 | 3.29 | 3.28 | 3400.0 | 11156.0 |
869880 rows × 6 columns
In [6]:
X=beer[['open','close','high','low','volume','money']]
In [7]:
#K_means clustering
from sklearn.cluster import KMeans
km = KMeans(n_clusters=3).fit(X)
km2 = KMeans(n_clusters=2).fit(X)
In [8]:
km.labels_
Out[8]:
array([0, 0, 0, ..., 0, 0, 0])
In [12]:
beer['close1'] = km.labels_
beer['close2']=km.labels_
beer.sort_values('close1')
Out[12]:
open | close | high | low | volume | money | close1 | close2 | |
---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 2.26 | 2.25 | 64047.0 | 144526.0 | 0 | 0 |
576484 | 0 | 0 | 5.96 | 5.94 | 411400.0 | 2446780.0 | 0 | 0 |
576485 | 0 | 0 | 5.97 | 5.95 | 307400.0 | 1831532.0 | 0 | 0 |
576486 | 0 | 0 | 5.99 | 5.97 | 492800.0 | 2946828.0 | 0 | 0 |
576488 | 0 | 0 | 6.02 | 6.01 | 429200.0 | 2583728.0 | 0 | 0 |
576490 | 0 | 0 | 6.01 | 5.99 | 516800.0 | 3100452.0 | 0 | 0 |
576491 | 0 | 0 | 6.00 | 5.98 | 444700.0 | 2662148.0 | 0 | 0 |
576492 | 0 | 0 | 5.99 | 5.98 | 380300.0 | 2277268.0 | 0 | 0 |
576494 | 0 | 0 | 5.98 | 5.97 | 390200.0 | 2330280.0 | 0 | 0 |
576495 | 0 | 0 | 5.98 | 5.97 | 249300.0 | 1489820.0 | 0 | 0 |
576498 | 0 | 0 | 5.95 | 5.94 | 302000.0 | 1795104.0 | 0 | 0 |
576483 | 0 | 0 | 5.96 | 5.93 | 354600.0 | 2104872.0 | 0 | 0 |
576500 | 0 | 0 | 5.93 | 5.92 | 448400.0 | 2658368.0 | 0 | 0 |
576502 | 0 | 0 | 5.97 | 5.95 | 187800.0 | 1118504.0 | 0 | 0 |
576503 | 0 | 0 | 5.97 | 5.96 | 306200.0 | 1826672.0 | 0 | 0 |
576504 | 0 | 0 | 5.97 | 5.96 | 252900.0 | 1509208.0 | 0 | 0 |
576505 | 0 | 0 | 5.98 | 5.97 | 196600.0 | 1174200.0 | 0 | 0 |
576506 | 0 | 0 | 5.98 | 5.97 | 261900.0 | 1565096.0 | 0 | 0 |
576508 | 0 | 0 | 5.98 | 5.97 | 435200.0 | 2598160.0 | 0 | 0 |
576510 | 0 | 0 | 5.97 | 5.96 | 276400.0 | 1647848.0 | 0 | 0 |
576511 | 0 | 0 | 5.97 | 5.96 | 161100.0 | 960744.0 | 0 | 0 |
576512 | 0 | 0 | 5.97 | 5.95 | 299800.0 | 1786568.0 | 0 | 0 |
576513 | 0 | 0 | 5.97 | 5.95 | 139700.0 | 832264.0 | 0 | 0 |
576501 | 0 | 0 | 5.95 | 5.93 | 449800.0 | 2672864.0 | 0 | 0 |
576514 | 0 | 0 | 5.96 | 5.95 | 397000.0 | 2362656.0 | 0 | 0 |
576481 | 0 | 0 | 5.99 | 5.97 | 424400.0 | 2536606.0 | 0 | 0 |
576477 | 0 | 0 | 6.02 | 6.02 | 16700.0 | 100448.0 | 0 | 0 |
576425 | 0 | 0 | 5.80 | 5.79 | 192800.0 | 1117824.0 | 0 | 0 |
576427 | 0 | 0 | 5.81 | 5.80 | 213700.0 | 1240896.0 | 0 | 0 |
576428 | 0 | 0 | 5.81 | 5.80 | 235700.0 | 1368416.0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
624961 | 2 | 2 | 4.86 | 4.83 | 704100.0 | 3403838.0 | 2 | 2 |
150393 | 2 | 2 | 5.90 | 5.87 | 821400.0 | 4834944.0 | 2 | 2 |
177084 | 2 | 2 | 6.91 | 6.89 | 575700.0 | 3972896.0 | 2 | 2 |
177087 | 2 | 2 | 6.90 | 6.88 | 492700.0 | 3394368.0 | 2 | 2 |
177089 | 2 | 2 | 6.90 | 6.89 | 492200.0 | 3394752.0 | 2 | 2 |
285619 | 2 | 2 | 4.43 | 4.38 | 2127800.0 | 9331944.0 | 2 | 2 |
177104 | 2 | 2 | 6.88 | 6.86 | 485600.0 | 3339008.0 | 2 | 2 |
177109 | 2 | 2 | 6.88 | 6.86 | 606800.0 | 4172352.0 | 2 | 2 |
133974 | 2 | 2 | 6.46 | 6.43 | 512000.0 | 3302224.0 | 2 | 2 |
823290 | 2 | 2 | 4.33 | 4.22 | 1876800.0 | 8073984.0 | 2 | 2 |
177112 | 2 | 2 | 6.87 | 6.86 | 475700.0 | 3267200.0 | 2 | 2 |
823292 | 2 | 2 | 4.30 | 4.18 | 918000.0 | 3885136.0 | 2 | 2 |
624982 | 2 | 2 | 4.82 | 4.79 | 846400.0 | 4066904.0 | 2 | 2 |
177113 | 2 | 2 | 6.87 | 6.86 | 1610000.0 | 11050560.0 | 2 | 2 |
335774 | 2 | 2 | 7.87 | 7.83 | 1177600.0 | 9240112.0 | 2 | 2 |
177114 | 2 | 2 | 6.87 | 6.86 | 528200.0 | 3626880.0 | 2 | 2 |
624978 | 2 | 2 | 4.83 | 4.79 | 1828100.0 | 8784036.0 | 2 | 2 |
177115 | 2 | 2 | 6.87 | 6.86 | 571900.0 | 3926976.0 | 2 | 2 |
177116 | 2 | 2 | 6.87 | 6.86 | 541800.0 | 3720832.0 | 2 | 2 |
384885 | 2 | 2 | 7.08 | 7.03 | 576100.0 | 4067248.0 | 2 | 2 |
177119 | 2 | 2 | 6.87 | 6.87 | 1158400.0 | 7958208.0 | 2 | 2 |
177120 | 2 | 2 | 6.92 | 6.89 | 1121600.0 | 7734116.0 | 2 | 2 |
177121 | 2 | 2 | 6.91 | 6.86 | 618700.0 | 4261458.0 | 2 | 2 |
285642 | 2 | 2 | 4.39 | 4.38 | 839800.0 | 3681336.0 | 2 | 2 |
624966 | 2 | 2 | 4.85 | 4.79 | 1627700.0 | 7836872.0 | 2 | 2 |
150396 | 2 | 2 | 5.89 | 5.85 | 599000.0 | 3517232.0 | 2 | 2 |
150395 | 2 | 2 | 5.89 | 5.87 | 570800.0 | 3357552.0 | 2 | 2 |
150394 | 2 | 2 | 5.90 | 5.87 | 1019900.0 | 5993008.0 | 2 | 2 |
624979 | 2 | 2 | 4.80 | 4.77 | 854100.0 | 4084476.0 | 2 | 2 |
341983 | 2 | 2 | 7.05 | 7.04 | 707300.0 | 4988704.0 | 2 | 2 |
869880 rows × 8 columns
In [16]:
from pandas.plotting import scatter_matrix
%matplotlib inline
close_centers = km.cluster_centers_
close_centers_2 = km2.cluster_centers_
In [22]:
beer.groupby("close").mean()
Out[22]:
open | high | low | volume | money | close1 | close2 | |
---|---|---|---|---|---|---|---|
close | |||||||
0 | 0 | 4.678545 | 4.669329 | 7.491448e+04 | 3.665092e+05 | 0 | 0 |
1 | 1 | 7.283650 | 7.205276 | 6.855841e+06 | 4.644045e+07 | 1 | 1 |
2 | 2 | 6.804501 | 6.773411 | 9.541442e+05 | 6.112502e+06 | 2 | 2 |
In [20]:
beer.groupby("close2").mean()
Out[20]:
open | close | high | low | volume | money | close1 | |
---|---|---|---|---|---|---|---|
close2 | |||||||
0 | 0 | 0 | 4.678545 | 4.669329 | 7.491448e+04 | 3.665092e+05 | 0 |
1 | 1 | 1 | 7.283650 | 7.205276 | 6.855841e+06 | 4.644045e+07 | 1 |
2 | 2 | 2 | 6.804501 | 6.773411 | 9.541442e+05 | 6.112502e+06 | 2 |
In [33]:
centers = beer.groupby("close").mean().reset_index()
In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['font.size'] = 14
In [26]:
import numpy as np
colors = np.array(['red','green','blue','yellow'])
In [34]:
plt.scatter(beer['open'],beer['high'],c = colors[beer['close']])
plt.scatter(centers.open,centers.high,linewidths=3,marker='+',s=300,c='black')
plt.xlabel("open")
plt.ylabel("high")
Out[34]:
Text(0, 0.5, 'high')
In [35]:
scatter_matrix(beer[["close","open","high","low"]],s=100,alpha=1,c=colors[beer["close"]],figsize=(10,10))
plt.suptitle("With 3 centroids initialized")
Out[35]:
Text(0.5, 0.98, 'With 3 centroids initialized')
In [39]:
#scaled data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled
Out[39]:
array([[-1.24466895, -1.24971352, -1.24622056, -1.24811754, -0.14123794, -0.24382383], [-1.24970706, -1.24971352, -1.24622056, -1.24811754, -0.36183077, -0.32424962], [-1.24970706, -1.2547516 , -1.25125501, -1.25315928, -0.18552129, -0.26021814], ..., [-0.73078238, -0.73079179, -0.73270656, -0.72881834, -0.38898565, -0.33412469], [-0.73078238, -0.73079179, -0.73270656, -0.72881834, -0.38898565, -0.33412469], [-0.73078238, -0.72575372, -0.72767211, -0.72881834, -0.37583371, -0.32715434]])
In [40]:
km = KMeans(n_clusters=3).fit(X_scaled)
In [41]:
beer["scaled_cluster"] = km.labels_
beer.sort_values("scaled_cluster")
Out[41]:
open | close | high | low | volume | money | close1 | close2 | scaled_cluster | |
---|---|---|---|---|---|---|---|---|---|
667794 | 0 | 0 | 6.79 | 6.79 | 9300.0 | 63147.0 | 0 | 0 | 0 |
644428 | 0 | 0 | 6.96 | 6.87 | 71100.0 | 492974.0 | 0 | 0 | 0 |
644429 | 0 | 0 | 6.94 | 6.89 | 144600.0 | 998647.0 | 0 | 0 | 0 |
644430 | 0 | 0 | 6.89 | 6.86 | 152000.0 | 1045036.0 | 0 | 0 | 0 |
644431 | 0 | 0 | 6.87 | 6.85 | 115000.0 | 788371.0 | 0 | 0 | 0 |
644432 | 0 | 0 | 6.86 | 6.84 | 188200.0 | 1287814.0 | 0 | 0 | 0 |
644433 | 0 | 0 | 6.87 | 6.84 | 136400.0 | 934928.0 | 0 | 0 | 0 |
644434 | 0 | 0 | 6.88 | 6.86 | 294500.0 | 2025171.0 | 0 | 0 | 0 |
644435 | 0 | 0 | 6.90 | 6.89 | 82400.0 | 568002.0 | 0 | 0 | 0 |
644436 | 0 | 0 | 6.90 | 6.88 | 133900.0 | 923330.0 | 0 | 0 | 0 |
644437 | 0 | 0 | 6.90 | 6.88 | 90400.0 | 622793.0 | 0 | 0 | 0 |
644438 | 0 | 0 | 6.89 | 6.87 | 150600.0 | 1036431.0 | 0 | 0 | 0 |
644439 | 0 | 0 | 6.90 | 6.89 | 65700.0 | 452861.0 | 0 | 0 | 0 |
644440 | 0 | 0 | 6.90 | 6.88 | 48600.0 | 335217.0 | 0 | 0 | 0 |
644441 | 0 | 0 | 6.93 | 6.89 | 99100.0 | 684674.0 | 0 | 0 | 0 |
644442 | 0 | 0 | 6.95 | 6.92 | 74300.0 | 515826.0 | 0 | 0 | 0 |
644443 | 0 | 0 | 6.95 | 6.93 | 70300.0 | 487831.0 | 0 | 0 | 0 |
644444 | 0 | 0 | 6.94 | 6.92 | 162100.0 | 1123415.0 | 0 | 0 | 0 |
644445 | 0 | 0 | 6.94 | 6.91 | 217100.0 | 1503263.0 | 0 | 0 | 0 |
644446 | 0 | 0 | 6.96 | 6.92 | 250700.0 | 1740438.0 | 0 | 0 | 0 |
644447 | 2 | 2 | 7.05 | 6.94 | 539600.0 | 3774772.0 | 2 | 2 | 0 |
644448 | 0 | 0 | 7.10 | 7.00 | 427200.0 | 3024468.0 | 0 | 0 | 0 |
644427 | 0 | 0 | 6.88 | 6.84 | 343700.0 | 2356016.0 | 0 | 0 | 0 |
644449 | 0 | 0 | 7.07 | 7.01 | 162300.0 | 1140414.0 | 0 | 0 | 0 |
644426 | 0 | 0 | 6.86 | 6.84 | 236600.0 | 1621721.0 | 0 | 0 | 0 |
644424 | 0 | 0 | 6.90 | 6.85 | 412200.0 | 2833977.0 | 0 | 0 | 0 |
644403 | 0 | 0 | 6.96 | 6.93 | 341900.0 | 2375505.0 | 0 | 0 | 0 |
644404 | 0 | 0 | 6.98 | 6.94 | 301900.0 | 2099317.0 | 0 | 0 | 0 |
644405 | 0 | 0 | 7.01 | 6.97 | 429800.0 | 3006154.0 | 0 | 0 | 0 |
644406 | 0 | 0 | 7.00 | 6.96 | 166800.0 | 1164742.0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
618442 | 2 | 2 | 7.06 | 7.01 | 1116900.0 | 7846912.0 | 2 | 2 | 2 |
618441 | 2 | 2 | 7.02 | 7.00 | 1908000.0 | 13381184.0 | 2 | 2 | 2 |
618440 | 2 | 2 | 7.04 | 7.01 | 1622400.0 | 11398784.0 | 2 | 2 | 2 |
618439 | 2 | 2 | 7.07 | 7.03 | 1334500.0 | 9399104.0 | 2 | 2 | 2 |
618438 | 2 | 2 | 7.10 | 7.05 | 1093700.0 | 7742592.0 | 2 | 2 | 2 |
618437 | 2 | 2 | 7.13 | 7.10 | 1618000.0 | 11503744.0 | 2 | 2 | 2 |
631278 | 2 | 2 | 6.29 | 6.26 | 991200.0 | 6210959.0 | 2 | 2 | 2 |
569910 | 2 | 2 | 6.85 | 6.84 | 948300.0 | 6491008.0 | 2 | 2 | 2 |
569907 | 2 | 2 | 6.80 | 6.76 | 1801200.0 | 12228352.0 | 2 | 2 | 2 |
700290 | 2 | 2 | 9.08 | 9.05 | 947400.0 | 8585212.0 | 2 | 2 | 2 |
631284 | 2 | 2 | 6.24 | 6.20 | 1253900.0 | 7797035.0 | 2 | 2 | 2 |
247465 | 2 | 2 | 4.11 | 4.10 | 1247300.0 | 5114904.0 | 2 | 2 | 2 |
627605 | 2 | 2 | 5.92 | 5.92 | 1081100.0 | 6400640.0 | 2 | 2 | 2 |
591024 | 2 | 2 | 5.89 | 5.87 | 1119900.0 | 6586016.0 | 2 | 2 | 2 |
591023 | 2 | 2 | 5.89 | 5.88 | 1129000.0 | 6646656.0 | 2 | 2 | 2 |
591022 | 2 | 2 | 5.90 | 5.88 | 2324200.0 | 13693904.0 | 2 | 2 | 2 |
591021 | 2 | 2 | 5.88 | 5.85 | 2427700.0 | 14238800.0 | 2 | 2 | 2 |
591020 | 2 | 2 | 5.85 | 5.79 | 2601700.0 | 15139256.0 | 2 | 2 | 2 |
627604 | 2 | 2 | 5.92 | 5.92 | 2050400.0 | 12138376.0 | 2 | 2 | 2 |
627603 | 2 | 2 | 5.92 | 5.92 | 2218300.0 | 13132376.0 | 2 | 2 | 2 |
627602 | 1 | 1 | 5.92 | 5.85 | 8823500.0 | 52179664.0 | 1 | 1 | 2 |
627601 | 2 | 2 | 5.86 | 5.72 | 2469300.0 | 14324688.0 | 2 | 2 | 2 |
627600 | 1 | 1 | 5.80 | 5.67 | 8339200.0 | 47560426.0 | 1 | 1 | 2 |
658208 | 2 | 2 | 6.92 | 6.88 | 1306900.0 | 9012986.0 | 2 | 2 | 2 |
247449 | 2 | 2 | 4.10 | 4.09 | 1195200.0 | 4894624.0 | 2 | 2 | 2 |
247445 | 2 | 2 | 4.18 | 4.15 | 2356100.0 | 9817096.0 | 2 | 2 | 2 |
247444 | 2 | 2 | 4.18 | 4.16 | 5117300.0 | 21366048.0 | 2 | 2 | 2 |
618398 | 2 | 2 | 7.12 | 7.09 | 1734300.0 | 12318048.0 | 2 | 2 | 2 |
247443 | 2 | 2 | 4.16 | 4.12 | 2340600.0 | 9700918.0 | 2 | 2 | 2 |
361882 | 2 | 2 | 8.44 | 8.38 | 2697200.0 | 22710144.0 | 2 | 2 | 2 |
869880 rows × 9 columns
beer.groupby("scaled_cluster").mean()
In [42]:
beer.groupby("scaled_cluster").mean()
Out[42]:
open | close | high | low | volume | money | close1 | close2 | |
---|---|---|---|---|---|---|---|---|
scaled_cluster | ||||||||
0 | 0.078958 | 0.078958 | 6.779438 | 6.767458 | 1.059397e+05 | 7.168576e+05 | 0.078958 | 0.078958 |
1 | 0.006126 | 0.006126 | 3.311344 | 3.303476 | 7.111992e+04 | 2.462302e+05 | 0.006126 | 0.006126 |
2 | 1.957469 | 1.957469 | 6.565434 | 6.524719 | 1.791183e+06 | 1.130658e+07 | 1.957469 | 1.957469 |
In [47]:
pd.plotting.scatter_matrix(X,c=colors[beer.scaled_cluster],alpha=1,figsize=(10,10),s=100)
Out[47]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000001DBC8A90>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AB75390>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AB9B940>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001ABCBEB8>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC054A8>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC329B0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC61F60>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC9C588>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AC9C5C0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD020F0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD356A0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AD63C50>], [<matplotlib.axes._subplots.AxesSubplot object at 0x000000001ADA1240>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001ADD27F0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE05DA0>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE44390>, <matplotlib.axes._subplots.AxesSubplot object at 0x000000001AE73940>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022126EF0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000221634E0>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022193A90>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000221D3080>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022203630>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022235BE0>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000222731D0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000222A4780>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000222D5D30>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022311320>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000223428D0>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022374E80>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000223B2470>], [<matplotlib.axes._subplots.AxesSubplot object at 0x00000000223E3A20>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022416FD0>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000224525C0>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000000022482B70>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000224C3160>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000000224F2710>]], dtype=object)
In [ ]:
from sklearn import metrics
score_scaled = metrics.silhouette_score(X,beer.scaled_cluster)
score = metrics.silhouette_score(X,beer.cluster)
print(score_scaled,score)
In [ ]:
scores = []
for k in range(2,20):
labels = KMeans(n_clusters=k).fit(X).labels_
score = metrics.silhouette_score(X,labels)
scores.append(score)
scores
In [ ]:
plt.plot(list(range(2,20)),scores)
plt.xlabel("Number of Clusters Initialized")
plt.ylabel("Sihouette Score")
In [ ]:
from sklearn.cluster import DBSCAN
db = DBSCAN(eps=10,min_s)