@inproceedings{f8c8bd78a0d94a17baaf90f3901ae04d,
title = "K-Means-Lite: Real Time Clustering for Large Datasets",
abstract = "We present a simple algorithm to address the poor scalability of k-means, arguably the most popular clustering algorithm. Our algorithm, named k-means-lite, is based on an intuitive extension of the classical central limit theorem. It obtains the k centroids which k-means seeks, by making inference from a few small samples, rather than by repeated exhaustive comparison of data points and centroids. Experiments show that, compared to k-means, k-means-lite achieves drastic efficiency gain, and solves large datasets (up to 1 million points tested) in real time. The efficiency gain is increasingly manifest as data size and number of clusters increase. Interestingly, k-means-lite also produces better clustering quality than k-means on the largest 7 of 10 datasets tested.",
keywords = "accurate, clustering, efficient, k-means, real time, scalable",
author = "Olukanmi, {Peter O.} and Fulufhelo Nelwamondo and Tshilidzi Marwala",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 5th International Conference on Soft Computing and Machine Intelligence, ISCMI 2018 ; Conference date: 21-11-2018 Through 22-11-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1109/ISCMI.2018.8703210",
language = "English",
series = "5th International Conference on Soft Computing and Machine Intelligence, ISCMI 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "54--59",
booktitle = "5th International Conference on Soft Computing and Machine Intelligence, ISCMI 2018",
address = "United States",
}