.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/demo_env/example_atari_breakout_vectorized_ppo.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_demo_env_example_atari_breakout_vectorized_ppo.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_demo_env_example_atari_breakout_vectorized_ppo.py:


===============================================
A demo of ATARI Breakout environment with vectorized PPOAgent
===============================================
Illustration of the training and video rendering of PPO Agent in
ATARI Breakout environment.

Agent is slightly tuned, but not optimal. This is just for illustration purpose.

.. video:: ../../example_plot_atari_breakout_vectorized_ppo.mp4
   :width: 600

.. GENERATED FROM PYTHON SOURCE LINES 14-146

.. code-block:: python3


    from rlberry.manager import ExperimentManager
    from datetime import datetime
    from rlberry_research.agents.torch import PPOAgent
    from gymnasium.wrappers.rendering import RecordVideo
    import shutil
    import os
    from rlberry.envs.gym_make import atari_make
    from rlberry_research.agents.torch.utils.training import model_factory_from_env


    initial_time = datetime.now()
    print("-------- init agent --------")


    policy_mlp_configs = {
        "type": "MultiLayerPerceptron",  # A network architecture
        "layer_sizes": [512],  # Network dimensions
        "reshape": False,
        "is_policy": True,  # The network should output a distribution
        # over actions
    }

    critic_mlp_configs = {
        "type": "MultiLayerPerceptron",
        "layer_sizes": [512],
        "reshape": False,
        "out_size": 1,  # The critic network is an approximator of
        # a value function V: States -> |R
    }

    policy_configs = {
        "type": "ConvolutionalNetwork",  # A network architecture
        "activation": "RELU",
        "in_channels": 4,
        "in_height": 84,
        "in_width": 84,
        "head_mlp_kwargs": policy_mlp_configs,
        "transpose_obs": False,
        "is_policy": True,  # The network should output a distribution
    }

    critic_configs = {
        "type": "ConvolutionalNetwork",
        "layer_sizes": "RELU",
        "in_channels": 4,
        "in_height": 84,
        "in_width": 84,
        "head_mlp_kwargs": critic_mlp_configs,
        "transpose_obs": False,
        "out_size": 1,
    }


    tuned_xp = ExperimentManager(
        PPOAgent,  # The Agent class.
        (
            atari_make,
            dict(id="ALE/Breakout-v5"),
        ),  # The Environment to solve.
        init_kwargs=dict(  # Where to put the agent's hyperparameters
            batch_size=256,
            optimizer_type="ADAM",  # What optimizer to use for policy gradient descent steps.
            learning_rate=2.5e-4,  # Size of the policy gradient descent steps.
            policy_net_fn=model_factory_from_env,  # A policy network constructor
            policy_net_kwargs=policy_configs,  # Policy network's architecure
            value_net_fn=model_factory_from_env,  # A Critic network constructor
            value_net_kwargs=critic_configs,  # Critic network's architecure.
            n_envs=8,
            gamma=0.99,
            gae_lambda=0.95,
            clip_eps=0.1,
            k_epochs=4,
            n_steps=128,
        ),
        fit_budget=10_000_000,  # The number of interactions between the agent and the environment during training.
        eval_kwargs=dict(
            eval_horizon=500
        ),  # The number of interactions between the agent and the environment during evaluations.
        n_fit=1,  # The number of agents to train. Usually, it is good to do more than 1 because the training is stochastic.
        agent_name="PPO_tuned",  # The agent's name.
        output_dir="PPO_for_breakout",
    )

    print("-------- init agent : done!--------")
    print("-------- train agent --------")

    tuned_xp.fit()

    print("-------- train agent : done!--------")

    final_train_time = datetime.now()

    print("-------- test agent with video--------")

    env = atari_make("ALE/Breakout-v5", render_mode="rgb_array")
    env = RecordVideo(env, "_video/temp")

    if "render_modes" in env.metadata:
        env.metadata["render.modes"] = env.metadata[
            "render_modes"
        ]  # bug with some 'gym' version

    observation, info = env.reset()
    for tt in range(30000):
        action = tuned_xp.get_agent_instances()[0].policy(observation)
        observation, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        if done:
            break

    env.close()

    print("-------- test agent with video : done!--------")
    final_test_time = datetime.now()
    tuned_xp.save()

    # need to move the final result inside the folder used for documentation
    os.rename(
        "_video/temp/rl-video-episode-0.mp4",
        "_video/example_plot_atari_breakout_vectorized_ppo.mp4",
    )
    shutil.rmtree("_video/temp/")


    print("Done!!!")
    print("-------------")
    print("begin run at :" + str(initial_time))
    print("end training at :" + str(final_train_time))
    print("end run at :" + str(final_test_time))
    print("-------------")


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 0.000 seconds)


.. _sphx_glr_download_auto_examples_demo_env_example_atari_breakout_vectorized_ppo.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: example_atari_breakout_vectorized_ppo.py <example_atari_breakout_vectorized_ppo.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: example_atari_breakout_vectorized_ppo.ipynb <example_atari_breakout_vectorized_ppo.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_