@inproceedings{cab1d4742d2f46d8bff9fe0cb2e7c5d6,
title = "Optimising reinforcement learning for neural networks",
abstract = "Reinforcement learning traditionally utilises binary encoders and/or linear function approximators to accomplish its Artificial Intelligence goals. The use of nonlinear function approximators such as neural networks is often shunned, due to excessive difficulties in implementation, usually resulting from stability issues. In this paper the implementation of reinforcement learning for training a neural network is examined, being applied to the problem of learning to play Tic Tac Toe. Methods of ensuring stability are examined, and differing training methodologies are compared in order to optimise the reinforcement learning of the system. TD(1) methods are compared with database methods, as well as a hybridised system that combines the two, which outperforms all of the homogenous systems.",
keywords = "Difference, Learning, Network, Neural, Reinforcement, Temporal, Tic-tac-toe",
author = "Evan Hurwitz and Tshilidzi Marwala",
year = "2005",
language = "English",
isbn = "9077381236",
series = "6th International Conference on Intelligent Games and Simulation, GAME-ON 2005",
pages = "13--18",
booktitle = "6th International Conference on Intelligent Games and Simulation, GAME-ON 2005",
note = "6th International Conference on Intelligent Games and Simulation, GAME-ON 2005 ; Conference date: 24-11-2005 Through 25-11-2005",
}