.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/demo_bandits/plot_exp3_bandit.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_demo_bandits_plot_exp3_bandit.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_demo_bandits_plot_exp3_bandit.py:


=============================
EXP3 Bandit cumulative regret
=============================

This script shows how to define a bandit environment and an EXP3
randomized algorithm.

.. GENERATED FROM PYTHON SOURCE LINES 9-120


.. image-sg:: /auto_examples/demo_bandits/images/sphx_glr_plot_exp3_bandit_001.png
   :alt: Cumulative Pseudo-Regret
   :srcset: /auto_examples/demo_bandits/images/sphx_glr_plot_exp3_bandit_001.png
   :class: sphx-glr-single-img


.. code-block:: python3


    import numpy as np
    from rlberry_research.envs.bandits import AdversarialBandit
    from rlberry_research.agents.bandits import (
        RandomizedAgent,
        TSAgent,
        makeEXP3Index,
        makeBetaPrior,
    )
    from rlberry.manager import ExperimentManager, plot_writer_data


    # Agents definition


    class EXP3Agent(RandomizedAgent):
        name = "EXP3"

        def __init__(self, env, **kwargs):
            prob, tracker_params = makeEXP3Index()
            RandomizedAgent.__init__(
                self,
                env,
                prob,
                writer_extra="action",
                tracker_params=tracker_params,
                **kwargs
            )


    class BernoulliTSAgent(TSAgent):
        """Thompson sampling for Bernoulli bandit"""

        name = "TS"

        def __init__(self, env, **kwargs):
            prior, _ = makeBetaPrior()
            TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)


    # Parameters of the problem
    T = 3000  # Horizon
    M = 20  # number of MC simu


    def switching_rewards(T, gap=0.1, rate=1.6):
        """Adversarially switching rewards over exponentially long phases.
        Inspired by Zimmert, Julian, and Yevgeny Seldin.
        "Tsallis-INF: An Optimal Algorithm for Stochastic and Adversarial Bandits."
        J. Mach. Learn. Res. 22 (2021): 28-1.
        """
        rewards = np.zeros((T, 2))
        t = 0
        exp = 1
        high_rewards = True
        for t in range(T):
            if t > np.floor(rate**exp):
                high_rewards = not high_rewards
                exp += 1
            if high_rewards:
                rewards[t] = [1.0 - gap, 1.0]
            else:
                rewards[t] = [0.0, gap]
        return rewards


    rewards = switching_rewards(T, rate=5.0)


    # Construction of the experiment

    env_ctor = AdversarialBandit
    env_kwargs = {"rewards": rewards}

    Agents_class = [EXP3Agent, BernoulliTSAgent]

    agents = [
        ExperimentManager(
            Agent,
            (env_ctor, env_kwargs),
            init_kwargs={},
            fit_budget=T,
            n_fit=M,
            parallelization="process",
            mp_context="fork",
        )
        for Agent in Agents_class
    ]

    # these parameters should give parallel computing even in notebooks


    # Agent training
    for agent in agents:
        agent.fit()


    # Compute and plot (pseudo-)regret
    def compute_pseudo_regret(actions):
        selected_rewards = np.array(
            [rewards[t, int(action)] for t, action in enumerate(actions)]
        )
        return np.cumsum(np.max(rewards, axis=1) - selected_rewards)


    output = plot_writer_data(
        agents,
        tag="action",
        preprocess_func=compute_pseudo_regret,
        title="Cumulative Pseudo-Regret",
    )


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 7.492 seconds)


.. _sphx_glr_download_auto_examples_demo_bandits_plot_exp3_bandit.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: plot_exp3_bandit.py <plot_exp3_bandit.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: plot_exp3_bandit.ipynb <plot_exp3_bandit.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_