import numpy as np
from rlberry.envs.finite_mdp import FiniteMDP
from rlberry.rendering import RenderInterface2D, Scene, GeometricPrimitive
[docs]class Chain(RenderInterface2D, FiniteMDP):
"""
Simple chain environment.
Reward 0.05 in initial state, reward 1.0 in final state.
Parameters
----------
L : int
length of the chain
fail_prob : double
fail probability
"""
name = "Chain"
def __init__(self, L=5, fail_prob=0.1):
assert L >= 2
self.L = L
self.fail_prob = fail_prob
# transition probabilities
P = np.zeros((L, 2, L))
for ss in range(L):
for _ in range(2):
if ss == 0:
P[ss, 0, ss] = 1.0 - fail_prob # action 0 = don't move
P[ss, 1, ss + 1] = 1.0 - fail_prob # action 1 = right
P[ss, 0, ss + 1] = fail_prob
P[ss, 1, ss] = fail_prob
elif ss == L - 1:
P[ss, 0, ss - 1] = 1.0 - fail_prob # action 0 = left
P[ss, 1, ss] = 1.0 - fail_prob # action 1 = don't move
P[ss, 0, ss] = fail_prob
P[ss, 1, ss - 1] = fail_prob
else:
P[ss, 0, ss - 1] = 1.0 - fail_prob # action 0 = left
P[ss, 1, ss + 1] = 1.0 - fail_prob # action 1 = right
P[ss, 0, ss + 1] = fail_prob
P[ss, 1, ss - 1] = fail_prob
# mean reward
S = L
A = 2
R = np.zeros((S, A))
R[L - 1, :] = 1.0
R[0, :] = 0.05
# init base classes
FiniteMDP.__init__(self, R, P, initial_state_distribution=0)
RenderInterface2D.__init__(self)
self.reward_range = (0.0, 1.0)
# rendering info
self.set_clipping_area((0, L, 0, 1))
self.set_refresh_interval(100) # in milliseconds
[docs] def step(self, action):
assert action in self._actions, "Invalid action!"
# save state for rendering
if self.is_render_enabled():
self.append_state_for_rendering(self.state)
# take step
next_state, reward, terminated, truncated, info = self.sample(
self.state, action
)
self.state = next_state
return next_state, reward, terminated, truncated, info
#
# Code for rendering
#
[docs] def get_background(self):
"""
Returne a scene (list of shapes) representing the background
"""
bg = Scene()
colors = [(0.8, 0.8, 0.8), (0.9, 0.9, 0.9)]
for ii in range(self.L):
shape = GeometricPrimitive("QUADS")
shape.add_vertex((ii, 0))
shape.add_vertex((ii + 1, 0))
shape.add_vertex((ii + 1, 1))
shape.add_vertex((ii, 1))
shape.set_color(colors[ii % 2])
bg.add_shape(shape)
flag = GeometricPrimitive("TRIANGLES")
flag.set_color((0.0, 0.5, 0.0))
x = self.L - 0.5
y = 0.25
flag.add_vertex((x, y))
flag.add_vertex((x + 0.25, y + 0.5))
flag.add_vertex((x - 0.25, y + 0.5))
bg.add_shape(flag)
return bg
[docs] def get_scene(self, state):
"""
Return scene (list of shapes) representing a given state
"""
scene = Scene()
agent = GeometricPrimitive("QUADS")
agent.set_color((0.75, 0.0, 0.5))
size = 0.25
x = state + 0.5
y = 0.5
agent.add_vertex((x - size / 4.0, y - size))
agent.add_vertex((x + size / 4.0, y - size))
agent.add_vertex((x + size / 4.0, y + size))
agent.add_vertex((x - size / 4.0, y + size))
agent.add_vertex((x - size, y - size / 4.0))
agent.add_vertex((x + size, y - size / 4.0))
agent.add_vertex((x + size, y + size / 4.0))
agent.add_vertex((x - size, y + size / 4.0))
scene.add_shape(agent)
return scene