Source code for rlberry.wrappers.rescale_reward

import numpy as np
from rlberry.envs import Wrapper


[docs]class RescaleRewardWrapper(Wrapper): """ Rescale the reward function to a bounded range. Parameters ---------- reward_range: tuple (double, double) tuple with the desired reward range, which needs to be bounded. """ def __init__(self, env, reward_range): Wrapper.__init__(self, env) self.reward_range = reward_range assert reward_range[0] < reward_range[1] assert reward_range[0] > -np.inf and reward_range[1] < np.inf def _linear_rescaling(self, x, x0, x1, u0, u1): """ For x a value in [x0, x1], maps x linearly to the interval [u0, u1]. """ a = (u1 - u0) / (x1 - x0) b = (x1 * u0 - x0 * u1) / (x1 - x0) return a * x + b def _rescale(self, reward): x0, x1 = self.env.reward_range u0, u1 = self.reward_range # bounded reward if x0 > -np.inf and x1 < np.inf: return self._linear_rescaling(reward, x0, x1, u0, u1) # unbounded elif x0 > -np.inf and x1 == np.inf: x = reward - x0 # [0, infty] x = 2.0 / (1.0 + np.exp(-x)) - 1.0 # [0, 1] return self._linear_rescaling(x, 0.0, 1.0, u0, u1) # unbouded below elif x0 == -np.inf and x1 < np.inf: x = reward - x1 # [-infty, 0] x = 2.0 / (1.0 + np.exp(-x)) # [0, 1] return self._linear_rescaling(x, 0.0, 1.0, u0, u1) # unbounded else: x = 1.0 / (1.0 + np.exp(-reward)) # [0, 1] return self._linear_rescaling(x, 0.0, 1.0, u0, u1)
[docs] def step(self, action): observation, reward, terminated, truncated, info = self.env.step(action) rescaled_reward = self._rescale(reward) return observation, rescaled_reward, terminated, truncated, info
[docs] def sample(self, state, action): observation, reward, terminated, truncated, info = self.env.sample( state, action ) rescaled_reward = self._rescale(reward) return observation, rescaled_reward, terminated, truncated, info