Skip to content

Commit

Permalink
Merge pull request #228 from pfeinsper/new_reward_funcs
Browse files Browse the repository at this point in the history
New reward functions
  • Loading branch information
JorasOliveira authored Apr 25, 2024
2 parents c0dd584 + c4de36e commit 78176bb
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 13 deletions.
6 changes: 3 additions & 3 deletions DSSE/environment/coverage_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ class CoverageDroneSwarmSearch(DroneSwarmSearchBase):
}
reward_scheme = Reward(
default=0,
leave_grid=-100,
leave_grid=-10,
exceed_timestep=-100,
drones_collision=-100,
drones_collision=-10,
search_cell=10,
search_and_find=100,
)
Expand Down Expand Up @@ -132,7 +132,7 @@ def step(self, actions: dict[str, int]) -> tuple:
self.agents_positions[idx] = new_position
new_x, new_y = new_position
if new_position in self.not_seen_states:
reward_poc = 1 / (self.timestep) * prob_matrix[new_y, new_x] * 1_000
reward_poc = (1 / (self.timestep)) * prob_matrix[new_y, new_x] * 1_000
rewards[agent] = self.reward_scheme.search_cell + reward_poc
self.seen_states.add(new_position)
self.not_seen_states.remove(new_position)
Expand Down
14 changes: 6 additions & 8 deletions DSSE/environment/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ class DroneSwarmSearch(DroneSwarmSearchBase):
}

reward_scheme = Reward(
default=1,
leave_grid=-100_000,
exceed_timestep=-100_000,
drones_collision=-100_000,
default=0.1,
leave_grid=-200,
exceed_timestep=-200,
drones_collision=-200,
search_cell=1,
search_and_find=100_000,
search_and_find=200,
)

def __init__(
Expand Down Expand Up @@ -262,9 +262,7 @@ def step(self, actions):
elif is_searching:
prob_matrix = self.probability_matrix.get_matrix()
rewards[agent] = (
prob_matrix[drone_y][drone_x] * 10000
if prob_matrix[drone_y][drone_x] * 100 > 1
else -100
prob_matrix[drone_y][drone_x]
)

self.rewards_sum[agent] += rewards[agent]
Expand Down
2 changes: 1 addition & 1 deletion DSSE/tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_leave_grid_get_negative_reward(grid_size, person_initial_position):

done = False
reward_sum = 0
while not done and reward_sum >= -500_000:
while not done and reward_sum >= env.reward_scheme.leave_grid * (env.timestep_limit - 1):
actions = {"drone0": Actions.UP.value}
_, reward, terminations, done, _ = env.step(actions)
done = any(done.values())
Expand Down
2 changes: 1 addition & 1 deletion DSSE/tests/test_env_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_leave_grid_get_negative_reward(grid_size):

done = False
reward_sum = 0
while not done and reward_sum >= -9_000:
while not done and reward_sum >= (env.reward_scheme.leave_grid * (env.timestep_limit-1)) +1:
actions = {"drone0": Actions.UP.value}
_, reward, terminations, done, _ = env.step(actions)
done = any(done.values())
Expand Down

0 comments on commit 78176bb

Please sign in to comment.