聚类之dbscan算法

时间:2022-05-06
本文章向大家介绍聚类之dbscan算法,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。

简要的说明: dbscan为一个密度聚类算法,无需指定聚类个数。

python的简单实例:

 1 # coding:utf-8
 2 from sklearn.cluster import DBSCAN
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 from sklearn import metrics
 6 from sklearn.datasets import make_blobs
 7 from sklearn.preprocessing import StandardScaler
 8 
 9 print '==============================================================='
10 print 'produce the data'
11 print '==============================================================='
12 centers = [[1, 1], [-1, -1], [1, -1]]
13 X, Y = make_blobs(n_samples=100, centers=centers, cluster_std=0.1,
14                             random_state=0)
15 X = StandardScaler().fit_transform(X)
16 
17 print '==============================================================='
18 print 'calc by dbscan'
19 print '==============================================================='
20 db = DBSCAN(eps=0.8, min_samples=20).fit(X)
21 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
22 core_samples_mask[db.core_sample_indices_] = True
23 labels = db.labels_
24 
25 # Number of clusters in labels, ignoring noise if present.
26 n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
27 print('聚类个数: %d' % n_clusters_)
28 clus = dict();
29 for label in range(n_clusters_):
30     print('Cluster ', label, ':')
31     print(list(X[labels == label].flatten()))
32 
33 # Black removed and is used for noise instead.
34 unique_labels = set(labels)
35 colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
36 for k, col in zip(unique_labels, colors):
37     if k == -1:
38         # Black used for noise.
39         col = 'k'
40 
41     class_member_mask = (labels == k)
42 
43     xy = X[class_member_mask & core_samples_mask]
44     plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
45              markeredgecolor='k', markersize=14)
46 
47     xy = X[class_member_mask & ~core_samples_mask]
48     plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
49              markeredgecolor='k', markersize=6)
50 
51 plt.title('the number of clusters: %d' % n_clusters_)
52 plt.show()

结果:

聚类个数: 3
('Cluster ', 0, ':')
[0.71972237193721955, 1.4247928346062555, 0.71030169555602063, 1.4167660110433198, 0.86650601488165513, 1.540511791039243, 0.72211708218565507, 1.3420815507666486, 0.52144250622046129, 1.5915649627099053, 0.71856075881136006, 1.5389120321653047, 0.64879819429817887, 1.2594860931663014, 0.79530587030761835, 1.3059845691292478, 0.90217078085348124, 1.2810513687682608, 0.83428120392822847, 1.5121992651002165, 0.94501772892108737, 1.2304572600393282, 0.61658908505616727, 1.2016210693860701, 0.43123948422351122, 1.4540043441128292, 0.80748270682101664, 1.6223313161580848, 0.80443060148710011, 1.3686384349677738, 0.66615156531279185, 1.4012699966389015, 0.6619526285382874, 1.3526309930197211, 0.8911440824978365, 1.4271253228550598, 0.73656729474920646, 1.2941145631795228, 0.52954661764367772, 1.4337665710281307, 0.63563964407982976, 1.3462216323222238, 0.70021314158580827, 1.4301131836568965, 0.59151066054028689, 1.2340997618614948, 0.60781931318621818, 1.4257196900301823, 0.63157667601940692, 1.3465597131647515, 0.6922193145921226, 1.428232599016918, 0.53128314796952969, 1.3621288955307922, 0.56975224051689699, 1.4671406711851693, 0.6086375682191727, 1.1746304350700796, 0.78429058907277294, 1.3975929004149423, 0.64892102137794172, 1.3382327193866654, 0.75050124858369904, 1.4200749599097495, 0.86238319832692667, 1.3629329516580013, 0.70809022215282358, 1.3648390986044516]
('Cluster ', 1, ':')
[0.63464928652349617, -0.97205337209660403, 0.58556292018246547, -0.73073437840723787, 0.65468651727634131, -0.73441274377141652, 0.63547729042563716, -0.66453211054416861, 0.82977905264905216, -0.7026553048598404, 0.69272708259422322, -0.80662677945376782, 0.9336700453767246, -0.59453052783739546, 0.69594552246388464, -0.55457015205979654, 0.76464102851903226, -0.75835599381130958, 0.76982282906457911, -0.90616105214655729, 0.78543611278256287, -0.64893557021283277, 0.59666018000438314, -0.90008593889031796, 0.56548438993348771, -0.70794621415677228, 0.59303515144236474, -0.66398477418914037, 0.95709744689291321, -0.63610640287638309, 0.82323862006265514, -0.85079072543505374, 0.5630287661735992, -0.7852163996685585, 0.80131670450275849, -0.70246600519558988, 0.7454029815714649, -0.85218313302445714, 0.69903056978268618, -0.86014011002564883, 0.61762634973010477, -0.80939160363205609, 0.5726669483917376, -0.64672353808362981, 0.79449562934102214, -0.80530619881071974, 0.62387498724474699, -0.82390835490887293, 0.75896134677936167, -0.75445848024152995, 0.72097157756491004, -0.66892268630644069, 0.8043594793684804, -0.72698175393472497, 0.66550366099053682, -0.88207692316921515, 0.58097294102170138, -0.78269622047011467, 0.65015889850413455, -0.53164375004590891, 0.62442808457473808, -0.57263307430187604, 0.54434830115223298, -0.68966984891579086, 0.60597037368186768, -0.61780925553487331]
('Cluster ', 2, ':')
[-1.3018531714169292, -0.75534700218006379, -1.5240879328477461, -0.73075767535431713, -1.3558101832440284, -0.69305594070134047, -1.3620120045408117, -0.63846838584413301, -1.501425254649166, -0.75213478312911264, -1.509452276188433, -0.67908018226800171, -1.405243295552026, -0.63269595355922037, -1.3845689200452296, -0.80888897029610984, -1.389466770316631, -0.66133584344003127, -1.4703826992026119, -0.60662876562678392, -1.5515574536964245, -0.64073570728242468, -1.2268148679790234, -0.87919324689086187, -1.5524068961533037, -0.53014962867934823, -1.3956593058608056, -0.59560607016988043, -1.2688843291272613, -0.53521150305805631, -1.4263127243188716, -0.54687874172399775, -1.3060482963082332, -0.86721692570472864, -1.37782320727954, -0.89918162580831196, -1.473789241693936, -0.5401560289692996, -1.2284758921484242, -0.64018233171903494, -1.4714951134839154, -0.81553230478371208, -1.5627790544062625, -0.63346398650999924, -1.4559823420025644, -0.65116763855957849, -1.222575804590778, -0.57926104071118723, -1.5191797025632472, -0.53370819315524121, -1.3873298238589391, -0.85285539988795034, -1.499269551565358, -0.73289080952354146, -1.4606217359962219, -0.73031015933933197, -1.520199373022632, -0.79765210072574655, -1.5415010971493395, -0.62444408930165995, -1.4139110740238496, -0.69363628437548275, -1.3265183485066454, -0.75270484864742238, -1.3497595932847166, -0.72258801674792672]