1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| import random as ra import math # 参考西瓜书P205页 # k = int(input()) # 全部样本---值为每个样本的密度和含糖量 D = [[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], [0.556, 0.215], [0.403, 0.237], [0.481, 0.149], [0.437, 0.211], [0.666, 0.091], [0.243, 0.267]] # 开始设置分成几类 k=2 从样本中取两个初始点当作两个簇的中心 Prim = [[0.634, 0.264], [0.403, 0.237]] length_D = len(D) length_P = len(Prim) count = 0 # 迭代轮数 Total = 4 while count < Total: count += 1 # 按初始Prim个数生成两簇 C = [[i] for i in Prim] # [ [[0.634,0.264]] , [[0.403,0.237]] ] for j in range(length_D): # 计算样本到两个中心的距离 dist = [] for i in range(length_P): temp = math.sqrt(pow(D[j][0] - Prim[i][0], 2) + pow(D[j][1] - Prim[i][1], 2)) dist.append(temp) # 把样本划分到不同簇中 if dist[0] < dist[1]: C[0].append(D[j]) else: C[1].append(D[j]) # 完成当前所有样本划分后,对新的簇重新生成两个中心 for i in range(length_P): sum_x = 0 sum_y = 0 length_C = len(C[i]) for j in C[i]: sum_x += j[0] sum_y += j[1] ave_x = round(sum_x / length_C, 3) ave_y = round(sum_y / length_C, 3) # 更新簇中心 if Prim[i][0] != ave_x or Prim[i][1] != ave_y: Prim[i][0] = ave_x Prim[i][1] = ave_y # 聚类结果 print('第%d轮 当前迭代的当前簇划分' %count) print('第一类') for i in range(1, len(C[0])): print('编号', D.index(C[0][i]) + 1, end=' ') print('\n第二类') for i in range(1, len(C[1])): print('编号', D.index(C[1][i]) + 1, end=' ') print('\n均值向量为') print('第一类 ', Prim[0], '第二类 ', Prim[1], end='\n==============\n')
|