@inproceedings{6c85bc98ae684ae59500b7207a7d47b1,
title = "Modifying Class Distributions to Improve the Classification of Minority Group Examples in a Class-Imbalanced Dataset",
abstract = "Class-imbalanced datasets are a common occurrence in real-world applications. The imbalance between minority and majority classes exists due to the over-representation of one class compared to another in a dataset. The class imbalance might reflect a system{\textquoteright}s behaviour over time. However, the class imbalance causes sub-optimal performance for machine learning models that predict the system{\textquoteright}s future behaviour. Various techniques are used to reduce the negative impact of class-imbalanced datasets on machine learning models. Data resampling techniques are one of the main techniques, and the subdivisions of data resampling techniques include oversampling and undersampling. Oversampling techniques have outperformed undersampling techniques in most studies, and most data resampling techniques are derived from oversampling. However, some oversampling techniques are ineffective when used on minority-class datasets that lack within-class variation and have a high-class imbalance. In this study, an analysis was performed to understand the changes in within-class variation before and after oversampling for nine datasets. Additionally, classification performance was measured for standard and hybrid oversampled datasets. A novel hybrid oversampling technique that uses k-Means and ADASYN was implemented. Hybrid oversampling techniques generated synthetic examples that marginally changed the within-class variation and had the highest F1 score compared to standard oversampling techniques across nine datasets.",
keywords = "ADASYN, Class imbalance, Classification algorithm, Oversampling, Within-class variation",
author = "Banele Mdluli and \{van Zyl\}, \{Terence L.\}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2026.; 6th Southern African Conference for Artificial Intelligence Research, SACAIR 2025 ; Conference date: 01-12-2025 Through 05-12-2025",
year = "2026",
doi = "10.1007/978-3-032-11733-5\_7",
language = "English",
isbn = "9783032117328",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "112--126",
editor = "Aurona Gerber and Pillay, \{Anban W.\}",
booktitle = "Artificial Intelligence Research - 6th Southern African Conference, SACAIR 2025, Proceedings",
address = "Germany",
}