.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/demo_env/video_plot_atari_freeway.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_demo_env_video_plot_atari_freeway.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_demo_env_video_plot_atari_freeway.py:


===============================================
A demo of ATARI Freeway environment with DQNAgent
===============================================
Illustration of the training and video rendering of DQN Agent in
ATARI Freeway environment.

Agent is slightly tuned, but not optimal. This is just for illustration purpose.

.. video:: ../../video_plot_atari_freeway.mp4
   :width: 600

.. GENERATED FROM PYTHON SOURCE LINES 14-119

.. code-block:: python3


    from rlberry.manager import ExperimentManager
    from datetime import datetime
    from rlberry_research.agents.torch.dqn.dqn import DQNAgent
    from gymnasium.wrappers.rendering import RecordVideo
    import shutil
    import os
    from rlberry.envs.gym_make import atari_make


    initial_time = datetime.now()
    print("-------- init agent --------")

    mlp_configs = {
        "type": "MultiLayerPerceptron",  # A network architecture
        "layer_sizes": [512],  # Network dimensions
        "reshape": False,
        "is_policy": False,  # The network should output a distribution
        # over actions
    }

    cnn_configs = {
        "type": "ConvolutionalNetwork",  # A network architecture
        "activation": "RELU",
        "in_channels": 4,
        "in_height": 84,
        "in_width": 84,
        "head_mlp_kwargs": mlp_configs,
        "transpose_obs": False,
        "is_policy": False,  # The network should output a distribution
    }

    tuned_xp = ExperimentManager(
        DQNAgent,  # The Agent class.
        (
            atari_make,
            dict(
                id="ALE/Freeway-v5",
            ),
        ),  # The Environment to solve.
        init_kwargs=dict(  # Where to put the agent's hyperparameters
            q_net_constructor="rlberry_research.agents.torch.utils.training.model_factory_from_env",
            q_net_kwargs=cnn_configs,
            max_replay_size=50000,
            batch_size=32,
            learning_starts=25000,
            gradient_steps=1,
            epsilon_final=0.01,
            learning_rate=1e-4,  # Size of the policy gradient descent steps.
            chunk_size=1,
        ),
        fit_budget=90000,  # The number of interactions between the agent and the environment during training.
        eval_kwargs=dict(
            eval_horizon=500
        ),  # The number of interactions between the agent and the environment during evaluations.
        n_fit=1,  # The number of agents to train. Usually, it is good to do more than 1 because the training is stochastic.
        agent_name="DQN_tuned",  # The agent's name.
        output_dir="DQN_for_freeway",
    )

    print("-------- init agent : done!--------")
    print("-------- train agent --------")

    tuned_xp.fit()

    print("-------- train agent : done!--------")

    final_train_time = datetime.now()

    print("-------- test agent with video--------")

    env = atari_make("ALE/Freeway-v5", render_mode="rgb_array")
    env = RecordVideo(env, "_video/temp")

    if "render_modes" in env.metadata:
        env.metadata["render.modes"] = env.metadata[
            "render_modes"
        ]  # bug with some 'gym' version

    observation, info = env.reset()
    for tt in range(30000):
        action = tuned_xp.get_agent_instances()[0].policy(observation)
        observation, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        if done:
            break

    env.close()

    print("-------- test agent with video : done!--------")
    final_test_time = datetime.now()
    tuned_xp.save()

    # need to move the final result inside the folder used for documentation
    os.rename("_video/temp/rl-video-episode-0.mp4", "_video/video_plot_atari_freeway.mp4")
    shutil.rmtree("_video/temp/")


    print("Done!!!")
    print("-------------")
    print("begin run at :" + str(initial_time))
    print("end training at :" + str(final_train_time))
    print("end run at :" + str(final_test_time))
    print("-------------")


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 0.000 seconds)


.. _sphx_glr_download_auto_examples_demo_env_video_plot_atari_freeway.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: video_plot_atari_freeway.py <video_plot_atari_freeway.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: video_plot_atari_freeway.ipynb <video_plot_atari_freeway.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_