Source code for rlberry_scool.envs.finite.chain

import numpy as np

from rlberry.envs.finite_mdp import FiniteMDP
from rlberry.rendering import RenderInterface2D, Scene, GeometricPrimitive


[docs]class Chain(RenderInterface2D, FiniteMDP):
    """
    Simple chain environment.
    Reward 0.05 in initial state, reward 1.0 in final state.

    Parameters
    ----------
    L : int
        length of the chain
    fail_prob : double
        fail probability
    """

    name = "Chain"

    def __init__(self, L=5, fail_prob=0.1):
        assert L >= 2
        self.L = L
        self.fail_prob = fail_prob

        # transition probabilities
        P = np.zeros((L, 2, L))
        for ss in range(L):
            for _ in range(2):
                if ss == 0:
                    P[ss, 0, ss] = 1.0 - fail_prob  # action 0 = don't move
                    P[ss, 1, ss + 1] = 1.0 - fail_prob  # action 1 = right
                    P[ss, 0, ss + 1] = fail_prob
                    P[ss, 1, ss] = fail_prob
                elif ss == L - 1:
                    P[ss, 0, ss - 1] = 1.0 - fail_prob  # action 0 = left
                    P[ss, 1, ss] = 1.0 - fail_prob  # action 1 = don't move
                    P[ss, 0, ss] = fail_prob
                    P[ss, 1, ss - 1] = fail_prob
                else:
                    P[ss, 0, ss - 1] = 1.0 - fail_prob  # action 0 = left
                    P[ss, 1, ss + 1] = 1.0 - fail_prob  # action 1 = right
                    P[ss, 0, ss + 1] = fail_prob
                    P[ss, 1, ss - 1] = fail_prob

                    # mean reward
        S = L
        A = 2
        R = np.zeros((S, A))
        R[L - 1, :] = 1.0
        R[0, :] = 0.05

        # init base classes
        FiniteMDP.__init__(self, R, P, initial_state_distribution=0)
        RenderInterface2D.__init__(self)
        self.reward_range = (0.0, 1.0)

        # rendering info
        self.set_clipping_area((0, L, 0, 1))
        self.set_refresh_interval(100)  # in milliseconds

[docs]    def step(self, action):
        assert action in self._actions, "Invalid action!"

        # save state for rendering
        if self.is_render_enabled():
            self.append_state_for_rendering(self.state)

        # take step
        next_state, reward, terminated, truncated, info = self.sample(
            self.state, action
        )

        self.state = next_state
        return next_state, reward, terminated, truncated, info

    #
    # Code for rendering
    #

[docs]    def get_background(self):
        """
        Returne a scene (list of shapes) representing the background
        """
        bg = Scene()
        colors = [(0.8, 0.8, 0.8), (0.9, 0.9, 0.9)]
        for ii in range(self.L):
            shape = GeometricPrimitive("QUADS")
            shape.add_vertex((ii, 0))
            shape.add_vertex((ii + 1, 0))
            shape.add_vertex((ii + 1, 1))
            shape.add_vertex((ii, 1))
            shape.set_color(colors[ii % 2])
            bg.add_shape(shape)

        flag = GeometricPrimitive("TRIANGLES")
        flag.set_color((0.0, 0.5, 0.0))
        x = self.L - 0.5
        y = 0.25
        flag.add_vertex((x, y))
        flag.add_vertex((x + 0.25, y + 0.5))
        flag.add_vertex((x - 0.25, y + 0.5))
        bg.add_shape(flag)

        return bg

[docs]    def get_scene(self, state):
        """
        Return scene (list of shapes) representing a given state
        """
        scene = Scene()

        agent = GeometricPrimitive("QUADS")
        agent.set_color((0.75, 0.0, 0.5))

        size = 0.25
        x = state + 0.5
        y = 0.5

        agent.add_vertex((x - size / 4.0, y - size))
        agent.add_vertex((x + size / 4.0, y - size))
        agent.add_vertex((x + size / 4.0, y + size))
        agent.add_vertex((x - size / 4.0, y + size))

        agent.add_vertex((x - size, y - size / 4.0))
        agent.add_vertex((x + size, y - size / 4.0))
        agent.add_vertex((x + size, y + size / 4.0))
        agent.add_vertex((x - size, y + size / 4.0))

        scene.add_shape(agent)
        return scene