.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_examples/example_venv.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_examples_example_venv.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_examples_example_venv.py:


================================================
Using multiple virtual environments with rlberry
================================================

This example illustrate how to use the "with_venv" decorator
in order to automatically construct and use virtual environments
for RL experimentation with several separated environments.

The decorator `with_venv` is used to generate scripts at compile time
and then are run via `run_venv_xp`.
Remark: the functions 'run_sb' and 'run_mushroom' are not directly called
and are only there to give the script's text.

.. GENERATED FROM PYTHON SOURCE LINES 15-101

.. code-block:: python3


    from rlberry.manager import with_venv, run_venv_xp


    # Decorator with_venv will create a script to be run in the virtual environment with
    # the libraries in the import_libs list. Here we want to create a virtual environment
    # containing mushroom_rl library and run an example script taken from mushroom_rl doc.
    @with_venv(import_libs=["numpy", "mushroom_rl"], venv_dir_name="rlberry_venvs")
    def run_mushroom():
        """
        Simple script to solve a simple chain with Q-Learning.

        """
        import numpy as np

        from mushroom_rl.algorithms.value import QLearning
        from mushroom_rl.core import Core, Logger
        from mushroom_rl.environments import generate_simple_chain
        from mushroom_rl.policy import EpsGreedy
        from mushroom_rl.utils.parameters import Parameter
        from mushroom_rl.utils.dataset import compute_J

        np.random.seed()

        logger = Logger(QLearning.__name__, results_dir=None)
        logger.strong_line()
        logger.info("Experiment Algorithm: " + QLearning.__name__)

        # MDP
        mdp = generate_simple_chain(state_n=5, goal_states=[2], prob=0.8, rew=1, gamma=0.9)

        # Policy
        epsilon = Parameter(value=0.15)
        pi = EpsGreedy(epsilon=epsilon)

        # Agent
        learning_rate = Parameter(value=0.2)
        algorithm_params = dict(learning_rate=learning_rate)
        agent = QLearning(mdp.info, pi, **algorithm_params)

        # Core
        core = Core(agent, mdp)

        # Initial policy Evaluation
        dataset = core.evaluate(n_steps=1000)
        J = np.mean(compute_J(dataset, mdp.info.gamma))
        logger.info(f"J start: {J}")

        # Train
        core.learn(n_steps=10000, n_steps_per_fit=1)

        # Final Policy Evaluation
        dataset = core.evaluate(n_steps=1000)
        J = np.mean(compute_J(dataset, mdp.info.gamma))
        logger.info(f"J final: {J}")


    # Here we want to create a virtual environment containing stable-baselines3 library
    # and run an example script taken from stable-baselines3 doc.
    @with_venv(
        import_libs=["stable-baselines3"], venv_dir_name="rlberry_venvs", python_ver="3.9"
    )
    def run_sb():
        import gymnasium as gym

        from stable_baselines3 import A2C

        env = gym.make("CartPole-v1")

        model = A2C("MlpPolicy", env, verbose=1)
        model.learn(total_timesteps=1_500)

        vec_env = model.get_env()
        obs = vec_env.reset()
        cum_reward = 0
        for i in range(1000):
            action, _state = model.predict(obs, deterministic=True)
            obs, reward, done, info = vec_env.step(action)
            cum_reward += reward
        print(cum_reward)


    if __name__ == "__main__":
        # Collect all the scripts from the directory rlberry_venvs and tun them.
        run_venv_xp(venv_dir_name="rlberry_venvs")


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (0 minutes 0.000 seconds)


.. _sphx_glr_download_auto_examples_example_venv.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: example_venv.py <example_venv.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: example_venv.ipynb <example_venv.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_