.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/comparison_agents.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_comparison_agents.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_comparison_agents.py:


=========================
Compare Bandit Algorithms
=========================

This example illustrate the use of compare_agents, a function that uses multiple-testing to assess whether trained agents are
statistically different or not.

Remark that in the case where two agents are not deemed statistically different it can mean either that they are as efficient,
or it can mean that there have not been enough fits to assess the variability of the agents.

.. GENERATED FROM PYTHON SOURCE LINES 13-109

.. code-block:: python3


    import numpy as np

    from rlberry.manager.comparison import compare_agents
    from rlberry.manager import AgentManager
    from rlberry_research.envs.bandits import BernoulliBandit
    from rlberry_research.agents.bandits import (
        IndexAgent,
        makeBoundedMOSSIndex,
        makeBoundedNPTSIndex,
        makeBoundedUCBIndex,
        makeETCIndex,
    )

    # Parameters of the problem
    means = np.array([0.6, 0.6, 0.6, 0.9])  # means of the arms
    A = len(means)
    T = 2000  # Horizon
    N = 50  # number of fits

    # Construction of the experiment

    env_ctor = BernoulliBandit
    env_kwargs = {"p": means}


    class UCBAgent(IndexAgent):
        name = "UCB"

        def __init__(self, env, **kwargs):
            index, _ = makeBoundedUCBIndex()
            IndexAgent.__init__(self, env, index, writer_extra="reward", **kwargs)


    class ETCAgent(IndexAgent):
        name = "ETC"

        def __init__(self, env, m=20, **kwargs):
            index, _ = makeETCIndex(A, m)
            IndexAgent.__init__(
                self, env, index, writer_extra="action_and_reward", **kwargs
            )


    class MOSSAgent(IndexAgent):
        name = "MOSS"

        def __init__(self, env, **kwargs):
            index, _ = makeBoundedMOSSIndex(T, A)
            IndexAgent.__init__(
                self, env, index, writer_extra="action_and_reward", **kwargs
            )


    class NPTSAgent(IndexAgent):
        name = "NPTS"

        def __init__(self, env, **kwargs):
            index, tracker_params = makeBoundedNPTSIndex()
            IndexAgent.__init__(
                self,
                env,
                index,
                writer_extra="reward",
                tracker_params=tracker_params,
                **kwargs,
            )


    Agents_class = [MOSSAgent, NPTSAgent, UCBAgent, ETCAgent]

    managers = [
        AgentManager(
            Agent,
            train_env=(env_ctor, env_kwargs),
            fit_budget=T,
            parallelization="process",
            mp_context="fork",
            n_fit=N,
        )
        for Agent in Agents_class
    ]


    for manager in managers:
        manager.fit()


    def eval_function(manager, eval_budget=None, agent_id=0):
        df = manager.get_writer_data()[agent_id]
        return T * np.max(means) - np.sum(df.loc[df["tag"] == "reward", "value"])


    print(
        compare_agents(managers, method="tukey_hsd", eval_function=eval_function, B=10_000)
    )


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 0.000 seconds)


.. _sphx_glr_download_auto_examples_comparison_agents.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: comparison_agents.py <comparison_agents.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: comparison_agents.ipynb <comparison_agents.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_