diff --git a/DSSE/environment/coverage_env.py b/DSSE/environment/coverage_env.py index b934e03b..610c7407 100644 --- a/DSSE/environment/coverage_env.py +++ b/DSSE/environment/coverage_env.py @@ -12,9 +12,9 @@ class CoverageDroneSwarmSearch(DroneSwarmSearchBase): } reward_scheme = Reward( default=0, - leave_grid=-100, + leave_grid=-10, exceed_timestep=-100, - drones_collision=-100, + drones_collision=-10, search_cell=10, search_and_find=100, ) @@ -132,7 +132,7 @@ def step(self, actions: dict[str, int]) -> tuple: self.agents_positions[idx] = new_position new_x, new_y = new_position if new_position in self.not_seen_states: - reward_poc = 1 / (self.timestep) * prob_matrix[new_y, new_x] * 1_000 + reward_poc = (1 / (self.timestep)) * prob_matrix[new_y, new_x] * 1_000 rewards[agent] = self.reward_scheme.search_cell + reward_poc self.seen_states.add(new_position) self.not_seen_states.remove(new_position) diff --git a/DSSE/environment/env.py b/DSSE/environment/env.py index 48119680..3e516b13 100644 --- a/DSSE/environment/env.py +++ b/DSSE/environment/env.py @@ -18,12 +18,12 @@ class DroneSwarmSearch(DroneSwarmSearchBase): } reward_scheme = Reward( - default=1, - leave_grid=-100_000, - exceed_timestep=-100_000, - drones_collision=-100_000, + default=0.1, + leave_grid=-200, + exceed_timestep=-200, + drones_collision=-200, search_cell=1, - search_and_find=100_000, + search_and_find=200, ) def __init__( @@ -262,9 +262,7 @@ def step(self, actions): elif is_searching: prob_matrix = self.probability_matrix.get_matrix() rewards[agent] = ( - prob_matrix[drone_y][drone_x] * 10000 - if prob_matrix[drone_y][drone_x] * 100 > 1 - else -100 + prob_matrix[drone_y][drone_x] ) self.rewards_sum[agent] += rewards[agent] diff --git a/DSSE/tests/test_env.py b/DSSE/tests/test_env.py index e00bbb8c..bfd80754 100644 --- a/DSSE/tests/test_env.py +++ b/DSSE/tests/test_env.py @@ -161,7 +161,7 @@ def test_leave_grid_get_negative_reward(grid_size, person_initial_position): done = False reward_sum = 0 - while not done and reward_sum >= -500_000: + while not done and reward_sum >= env.reward_scheme.leave_grid * (env.timestep_limit - 1): actions = {"drone0": Actions.UP.value} _, reward, terminations, done, _ = env.step(actions) done = any(done.values()) diff --git a/DSSE/tests/test_env_coverage.py b/DSSE/tests/test_env_coverage.py index 70078b44..1c0b1c95 100644 --- a/DSSE/tests/test_env_coverage.py +++ b/DSSE/tests/test_env_coverage.py @@ -104,7 +104,7 @@ def test_leave_grid_get_negative_reward(grid_size): done = False reward_sum = 0 - while not done and reward_sum >= -9_000: + while not done and reward_sum >= (env.reward_scheme.leave_grid * (env.timestep_limit-1)) +1: actions = {"drone0": Actions.UP.value} _, reward, terminations, done, _ = env.step(actions) done = any(done.values())