DP-means k - means聚类算法的比较
使用python进行编码,DP-means和 k - means聚类算法的比较,里面有数据集。
代码片段和文件信息
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-master
文件 295 2014-12-01 17:25 Python-DP-Means-Clustering-master.gitignore
文件 1748 2014-12-01 17:25 Python-DP-Means-Clustering-masterDPopt.py
文件 1317 2014-12-01 17:25 Python-DP-Means-Clustering-masterLICENSE
文件 6058 2014-12-01 17:25 Python-DP-Means-Clustering-masterREADME
文件 6124 2014-12-01 17:25 Python-DP-Means-Clustering-mastercluster.py
文件 354 2014-12-01 17:25 Python-DP-Means-Clustering-mastercostTest.bash
文件 1232 2014-12-01 17:25 Python-DP-Means-Clustering-mastercreateTestData.py
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg
文件 18195 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg2d-sample-data.png
文件 96576 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg3d-sample-data.png
文件 20568 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgerror.png
文件 241453 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgiters.png
文件 18088 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgopt_error.png
文件 174533 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgopt_iters.png
文件 20973 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors.png
文件 22520 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors_20.png
文件 30375 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors_20_annotated.png
文件 24465 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error.png
文件 26844 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_10.png
文件 27245 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_12.png
文件 27581 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_15.png
文件 26937 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_18.png
文件 24941 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_2.png
文件 26406 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_20.png
文件 27115 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_3.png
文件 26713 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_4.png
文件 27006 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_5.png
文件 26386 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_8.png
文件 21904 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times.png
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-masterinput
............此处省略14个文件信息
#!/usr/bin/env python
from pprint import pprint
import sys
import random
import math
import timer
class kmeans(object):
def __init__(self _X _k _xVal = 0 _stop=False):
# X is sample size lists of dim length
#
# _xVal is the number of records to hold out cross-validation.
# To use this you must randomize input data!
#
# Setting _stop=True causes iteration to stop when out of cross-validate
# error starts to rise.
#
self.nFeatures = len(_X[0])
self.xValSize = _xVal
self.allSize = len(_X)
self.size = self.allSize - self.xValSize
self.X = _X
self.k = _k
self.stop = _stop
# Initialize group memebership
self.dataClusterId = [-1 for i in range(0 self.allSize)] # index of group for each data pair
self.clusters = {}
idx = 0
# initialize to k random data points
# don‘t assign x-val as a strat center
for i in random.sample(range(0 self.size) self.k):
self.clusters[idx] = self.X[i]
idx += 1
# output records
self.record = []
self.errorRecord = []
def dSquared(self x y):
dist2 = 0.0
for jk in zip(xy):
dist2 += (j - k)**2
return dist2
def error(self):
res = 0.0
for i in range(0 self.size):
res += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
# error on non training data
res1 = 0.0
err1 = 0.0
for i in range(self.size self.allSize):
res1 += self.dSquared(self.X[i] self.clusters[self.dataClusterId[i]])
if res1 > 0.0:
err1 = res1/self.xValSize
return res/self.size err1
def nearestCluster(self x):
cmin = sys.maxint
cidx = -sys.maxint
for j in self.clusters:
dist = math.sqrt(self.dSquared(x self.clusters[j]))
if dist < cmin: # record closest centroid
cmin = dist
cidx = j
return cidx cmin
def assign(self):
for i in range(0 self.allSize):
self.dataClusterId[i] dmin = self.nearestCluster(self.X[i])
def updateClusters(self):
ctemp = {} # dim sums by cluster
for j in range(0 self.k):
ctemp[j] = []
for k in range(0 self.nFeatures):
ctemp[j].append(0.0) # init sums
ctemp[j].append(0) # init counter
# only calculate clusters on training not cross-validation set
for i in range(0self.size):
for j in range(0 self.nFeatures):
ctemp[self.dataClusterId[i]][j] += self.X[i][j]
ctemp[self.dataClusterId[i]][self.nFeatures] += 1 # count
for c in self.clusters:
if ctemp[c][self.nFeatures] <> 0:
self.clusters[c] = [ ctemp[c][k]/ctemp[c][self.nFeatures] for k in range(0self.nFeatures)]
else:
# no members in this cluster
pass
return
def run(self nmax = 100 eps = 1e-7):
prev = 0.0
prevXVal = float(sys.maxint)
for iter in range(0nmax):
# update assignments
self.assign()
# calculate error
err errXVal = self.error()
#
if self.stop and errXVal - prevXVal >= 0.0:
sys.stderr.write(“Cross-validation error increasing at step %d
“%iter)
break
prevXVal = errXVal
#
if abs(err-prev) < eps:
sys.stderr.write(“Tolerance reached a
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-master
文件 295 2014-12-01 17:25 Python-DP-Means-Clustering-master.gitignore
文件 1748 2014-12-01 17:25 Python-DP-Means-Clustering-masterDPopt.py
文件 1317 2014-12-01 17:25 Python-DP-Means-Clustering-masterLICENSE
文件 6058 2014-12-01 17:25 Python-DP-Means-Clustering-masterREADME
文件 6124 2014-12-01 17:25 Python-DP-Means-Clustering-mastercluster.py
文件 354 2014-12-01 17:25 Python-DP-Means-Clustering-mastercostTest.bash
文件 1232 2014-12-01 17:25 Python-DP-Means-Clustering-mastercreateTestData.py
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg
文件 18195 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg2d-sample-data.png
文件 96576 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg3d-sample-data.png
文件 20568 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgerror.png
文件 241453 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgiters.png
文件 18088 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgopt_error.png
文件 174533 2014-12-01 17:25 Python-DP-Means-Clustering-masterimgopt_iters.png
文件 20973 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors.png
文件 22520 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors_20.png
文件 30375 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_errors_20_annotated.png
文件 24465 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error.png
文件 26844 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_10.png
文件 27245 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_12.png
文件 27581 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_15.png
文件 26937 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_18.png
文件 24941 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_2.png
文件 26406 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_20.png
文件 27115 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_3.png
文件 26713 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_4.png
文件 27006 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_5.png
文件 26386 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times-error_8.png
文件 21904 2014-12-01 17:25 Python-DP-Means-Clustering-masterimg est_times.png
目录 0 2014-12-01 17:25 Python-DP-Means-Clustering-masterinput
............此处省略14个文件信息
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件举报,一经查实,本站将立刻删除。
评论列表(条)