@inproceedings{a3651da517e34ef0abaf2cfb4e7e7230,
title = "Missing data imputation through the use of the random forest algorithm",
abstract = "This paper presents a comparison of different paradigms used for missing data imputation. The data set used is HIV seroprevalence data from an antenatal clinic study survey performed in 2001. Data imputation is performed through fivemethods:RandomForests; auto-associative neural networks with genetic algorithms; auto-associative neuro-fuzzy configurations; and two random forest and neural network based hybrids. Results indicate that Random Forests are superior in imputing missing data for the given data set in terms of accuracy and in terms of computation time, with accuracy increases of up to 32 % on average for certain variables when compared with auto-associative networks. While the concept of hybrid systems has promise, the presented systems appear to be hindered by their auto-associative neural network components.",
keywords = "Auto-associative, Imputation, Missing data, Neural network, Random forest",
author = "Adam Pantanowitz and Tshilidzi Marwala",
year = "2009",
doi = "10.1007/978-3-642-03156-4_6",
language = "English",
isbn = "9783642031557",
series = "Advances in Intelligent and Soft Computing",
publisher = "Springer Verlag",
pages = "53--62",
booktitle = "Advances in Computational Intelligence",
address = "Germany",
note = "2nd International Workshop on Advanced Computational Intelligence, IWACI 2009 ; Conference date: 22-06-2009 Through 23-06-2009",
}