{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x y\n", "0 -15.782689 -0.254265\n", "1 -3.977557 4.819669\n", "2 -18.330663 -28.667342\n", "3 -40.254848 -17.583849\n", "4 -35.352412 -16.411859\n", "5 -0.785538 14.015280\n", "6 2.393177 38.056014\n", "7 -8.767911 13.764614\n", "8 -25.912887 -2.752717\n", "9 -18.022458 12.628714\n", "10 2.706564 34.568426\n", "11 -25.282147 -7.125060\n", "12 2.786527 19.962042\n", "13 18.079987 -9.952786\n", "14 33.559819 -4.432935\n", "15 28.712898 -24.022064\n", "16 26.195718 -13.391081\n", "17 42.174709 -13.849395\n", "18 35.859710 0.628594\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import pandas as pd #penggunaan library pandas untuk....\n", "\n", "import numpy as np\n", "\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "df = pd.DataFrame({\n", " 'x': [12, 20, 28, 18, 29, 33, 24, 45, 45, 52, 51, 52, 55, 53, 55, 61, 64, 69, 72],\n", " 'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7, 24],\n", " 'z': [40, 50, 12, 7, 8, 55, 78, 45, 20, 35, 67, 15, 55, 45, 60, 40, 45, 55, 60]\n", "})\n", "\n", "\n", "from sklearn.decomposition import PCA\n", "pca = PCA(n_components=2)\n", "principalComponents = pca.fit_transform(df)\n", "principalDf = pd.DataFrame(data = principalComponents\n", " , columns = ['x', 'y'])\n", "\n", "print(principalDf)\n", "\n", "\n", "np.random.seed(200)\n", "k = 3\n", "# centroids[i] = [x, y]\n", "centroids = {\n", " i+1: [np.random.randint(-50,50), np.random.randint(-50, 50)]\n", " for i in range(k)\n", "}\n", " \n", "fig = plt.figure(figsize=(10, 10))\n", "plt.scatter(principalDf['x'], principalDf['y'], color='k')\n", "colmap = {1: 'r', 2: 'g', 3: 'b'}\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x y distance_from_1 distance_from_2 distance_from_3 \\\n", "0 -15.782689 -0.254265 34.731813 34.659291 33.484423 \n", "1 -3.977557 4.819669 43.679113 25.443210 23.004411 \n", "2 -18.330663 -28.667342 7.783227 41.797800 59.437683 \n", "3 -40.254848 -17.583849 23.102166 59.037932 62.829557 \n", "4 -35.352412 -16.411859 20.933704 54.011473 58.541293 \n", "5 -0.785538 14.015280 53.332714 28.940785 13.308117 \n", "6 2.393177 38.056014 76.737663 48.628483 12.334626 \n", "7 -8.767911 13.764614 50.134568 34.499557 18.419013 \n", "8 -25.912887 -2.752717 31.305780 44.225283 42.217595 \n", "9 -18.022458 12.628714 47.010297 41.510978 26.623764 \n", "10 2.706564 34.568426 73.585797 45.232290 8.870049 \n", "11 -25.282147 -7.125060 26.905507 43.290989 44.880709 \n", "12 2.786527 19.962042 60.244668 31.832775 6.430894 \n", "13 18.079987 -9.952786 48.466419 1.954423 38.258187 \n", "14 33.559819 -4.432935 64.709691 15.963456 41.735199 \n", "15 28.712898 -24.022064 53.648940 19.273628 55.358002 \n", "16 26.195718 -13.391081 54.261751 9.809870 44.731597 \n", "17 42.174709 -13.849395 69.174699 24.872314 54.497094 \n", "18 35.859710 0.628594 69.154352 19.834865 39.950344 \n", "\n", " closest color \n", "0 3 b \n", "1 3 b \n", "2 1 r \n", "3 1 r \n", "4 1 r \n", "5 3 b \n", "6 3 b \n", "7 3 b \n", "8 1 r \n", "9 3 b \n", "10 3 b \n", "11 1 r \n", "12 3 b \n", "13 2 g \n", "14 2 g \n", "15 2 g \n", "16 2 g \n", "17 2 g \n", "18 2 g \n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "## Assignment Stage\n", "\n", "def assignment(df, centroids):\n", " for i in centroids.keys():\n", " # sqrt((x1 - x2)^2 - (y1 - y2)^2)\n", " df['distance_from_{}'.format(i)] = (\n", " np.sqrt(\n", " (df['x'] - centroids[i][0]) ** 2\n", " + (df['y'] - centroids[i][1]) ** 2\n", " )\n", " )\n", " centroid_distance_cols = ['distance_from_{}'.format(i) for i in centroids.keys()]\n", " df['closest'] = df.loc[:, centroid_distance_cols].idxmin(axis=1)\n", " df['closest'] = df['closest'].map(lambda x: int(x.lstrip('distance_from_')))\n", " df['color'] = df['closest'].map(lambda x: colmap[x])\n", " return df\n", "\n", "df = assignment(principalDf, centroids)\n", "print(df)\n", "\n", "fig = plt.figure(figsize=(10, 10))\n", "plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.5, edgecolor='k')\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "## Update Stage\n", "\n", "import copy\n", "\n", "old_centroids = copy.deepcopy(centroids)\n", "\n", "def update(k):\n", " for i in centroids.keys():\n", " centroids[i][0] = np.mean(df[df['closest'] == i]['x'])\n", " centroids[i][1] = np.mean(df[df['closest'] == i]['y'])\n", " return k\n", "\n", "centroids = update(centroids)\n", " \n", "fig = plt.figure(figsize=(10, 10))\n", "ax = plt.axes()\n", "plt.scatter(principalDf['x'], principalDf['y'], color=df['color'], alpha=0.5, edgecolor='k')\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "for i in old_centroids.keys():\n", " old_x = old_centroids[i][0]\n", " old_y = old_centroids[i][1]\n", " dx = (centroids[i][0] - old_centroids[i][0]) * 0.25\n", " dy = (centroids[i][1] - old_centroids[i][1]) * 0.25\n", " ax.arrow(old_x, old_y, dx, dy, head_width=2, head_length=3, fc=colmap[i], ec=colmap[i])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df = assignment(principalDf, centroids)\n", "\n", "# Plot results\n", "fig = plt.figure(figsize=(10, 10))\n", "plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.5, edgecolor='k')\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Continue until all assigned categories don't change any more\n", "while True:\n", " closest_centroids = df['closest'].copy(deep=True)\n", " centroids = update(centroids)\n", " df = assignment(df, centroids)\n", " if closest_centroids.equals(df['closest']):\n", " break\n", "\n", "fig = plt.figure(figsize=(10, 10))\n", "plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.5, edgecolor='k')\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x y\n", "0 -15.782689 -0.254265\n", "1 -3.977557 4.819669\n", "2 -18.330663 -28.667342\n", "3 -40.254848 -17.583849\n", "4 -35.352412 -16.411859\n", "5 -0.785538 14.015280\n", "6 2.393177 38.056014\n", "7 -8.767911 13.764614\n", "8 -25.912887 -2.752717\n", "9 -18.022458 12.628714\n", "10 2.706564 34.568426\n", "11 -25.282147 -7.125060\n", "12 2.786527 19.962042\n", "13 18.079987 -9.952786\n", "14 33.559819 -4.432935\n", "15 28.712898 -24.022064\n", "16 26.195718 -13.391081\n", "17 42.174709 -13.849395\n", "18 35.859710 0.628594\n" ] }, { "data": { "text/plain": [ "KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n", " n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto',\n", " random_state=None, tol=0.0001, verbose=0)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({\n", " 'x': [12, 20, 28, 18, 29, 33, 24, 45, 45, 52, 51, 52, 55, 53, 55, 61, 64, 69, 72],\n", " 'y': [39, 36, 30, 52, 54, 46, 55, 59, 63, 70, 66, 63, 58, 23, 14, 8, 19, 7, 24],\n", " 'z': [40, 50, 12, 7, 8, 55, 78, 45, 20, 35, 67, 15, 55, 45, 60, 40, 45, 55, 60]\n", "})\n", "\n", "\n", "from sklearn.decomposition import PCA\n", "pca = PCA(n_components=2)\n", "principalComponents = pca.fit_transform(df)\n", "principalDf = pd.DataFrame(data = principalComponents\n", " , columns = ['x', 'y'])\n", "\n", "print(principalDf)\n", "\n", "\n", "from sklearn.cluster import KMeans\n", "\n", "kmeans = KMeans(n_clusters=3)\n", "kmeans.fit(principalDf)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "labels = kmeans.predict(principalDf)\n", "centroids = kmeans.cluster_centers_" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "while True:\n", " closest_centroids = df['closest'].copy(deep=True)\n", " centroids = update(centroids)\n", " df = assignment(df, centroids)\n", " if closest_centroids.equals(df['closest']):\n", " break\n", "\n", "fig = plt.figure(figsize=(10, 10))\n", "plt.scatter(df['x'], df['y'], color=df['color'], alpha=0.5, edgecolor='k')\n", "\n", "\n", "#colors = map(lambda x: colmap[x+1], labels)\n", "for i in centroids.keys():\n", " plt.scatter(*centroids[i], color=colmap[i])\n", "plt.xlim(-50, 50)\n", "plt.ylim(-50, 50)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }