.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/comparison_agents.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_comparison_agents.py: ========================= Compare Bandit Algorithms ========================= This example illustrate the use of compare_agents, a function that uses multiple-testing to assess whether trained agents are statistically different or not. Remark that in the case where two agents are not deemed statistically different it can mean either that they are as efficient, or it can mean that there have not been enough fits to assess the variability of the agents. .. GENERATED FROM PYTHON SOURCE LINES 13-109 .. code-block:: python3 import numpy as np from rlberry.manager.comparison import compare_agents from rlberry.manager import AgentManager from rlberry_research.envs.bandits import BernoulliBandit from rlberry_research.agents.bandits import ( IndexAgent, makeBoundedMOSSIndex, makeBoundedNPTSIndex, makeBoundedUCBIndex, makeETCIndex, ) # Parameters of the problem means = np.array([0.6, 0.6, 0.6, 0.9]) # means of the arms A = len(means) T = 2000 # Horizon N = 50 # number of fits # Construction of the experiment env_ctor = BernoulliBandit env_kwargs = {"p": means} class UCBAgent(IndexAgent): name = "UCB" def __init__(self, env, **kwargs): index, _ = makeBoundedUCBIndex() IndexAgent.__init__(self, env, index, writer_extra="reward", **kwargs) class ETCAgent(IndexAgent): name = "ETC" def __init__(self, env, m=20, **kwargs): index, _ = makeETCIndex(A, m) IndexAgent.__init__( self, env, index, writer_extra="action_and_reward", **kwargs ) class MOSSAgent(IndexAgent): name = "MOSS" def __init__(self, env, **kwargs): index, _ = makeBoundedMOSSIndex(T, A) IndexAgent.__init__( self, env, index, writer_extra="action_and_reward", **kwargs ) class NPTSAgent(IndexAgent): name = "NPTS" def __init__(self, env, **kwargs): index, tracker_params = makeBoundedNPTSIndex() IndexAgent.__init__( self, env, index, writer_extra="reward", tracker_params=tracker_params, **kwargs, ) Agents_class = [MOSSAgent, NPTSAgent, UCBAgent, ETCAgent] managers = [ AgentManager( Agent, train_env=(env_ctor, env_kwargs), fit_budget=T, parallelization="process", mp_context="fork", n_fit=N, ) for Agent in Agents_class ] for manager in managers: manager.fit() def eval_function(manager, eval_budget=None, agent_id=0): df = manager.get_writer_data()[agent_id] return T * np.max(means) - np.sum(df.loc[df["tag"] == "reward", "value"]) print( compare_agents(managers, method="tukey_hsd", eval_function=eval_function, B=10_000) ) .. rst-class:: sphx-glr-timing **Total running time of the script:** (0 minutes 0.000 seconds) .. _sphx_glr_download_auto_examples_comparison_agents.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: comparison_agents.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: comparison_agents.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_