I'm trying to learn planning, i have a half baked program and I'm stuck. I don't know whats wrong.
Well, here's an overview: I have an Agent who have goals and actions, doing an action will make the value of a goal lower. Each Action cost a energy.
Actions and Goals object are created and stored in the Agent Object.
from copy import deepcopy
class Action(object):
def __init__(self, name, goals=None, costs=None):
self.name = name
#{} = empty dictionary
self.goals = goals if goals else {}
self.costs = costs if costs else {}
#intialise time
if 'time' not in self.costs:
self.costs['time'] = 0
def __deepcopy__(self, memo):
# support deep copy
return Action(self.name, deepcopy(self.goals), deepcopy(self.costs))
def __str__(self):
return '%s, %s, Energy: %s' % (self.name, str(self.goals), str(self.costs.get('Energy')))
class Goal(object):
def __init__(self, name, value=0, rate=0.0):
self.name = name
self.value = value
self.rate = rate
def update(self, time):
# update goal value by rate using time
self.value += self.rate * time
def _deepcopy__(self, memo):
#supports deep copy
return Goal(self.name, self.value, self.rate)
def apply_action(self, action):
self.update(action.costs['time'])
if self.name in action.goals:
self.value = max(self.value + action.goals[self.name], 0)
def discontentment(self):
return self.value**2
def __str__(self):
#pretty string format
return 'Name: %s, Value %s, Rate: %s' % (self.name, str(self.value), str(self.rate))
class WorldState(object):
id = 0
def __init__(self, goals, actions, costs):
self.goals = deepcopy(goals)
self.actions = deepcopy(actions)
self.costs = deepcopy(costs)
self.reset_next_action()
WorldState.id += 1
self.id = WorldState.id
def __deepcopy__(self, memo):
# support deepcopy
return WorldState(self.goals, self.actions, self.costs)
def reset_next_action(self):
self.current_actions = self.valid_actions()
self.valid_action_len = len(self.current_actions)
def apply_action(self, action, goals=None, costs=None):
#use agent self.goals/costs if none provided
goals = goals if goals else self.goals
costs = costs if costs else self.costs
#go through all goals of the action influences and change
#apply this action to all goals
for name, goal in goals.items():
goal.apply_action(action)
for cost, change in self.actions[action.name].costs.items():
costs[cost] = costs[cost] + change
return goals, costs
def apply_action_reset(self, action):
self.apply_action(action)
self.reset_next_action()
def try_action(self, action):
goals = deepcopy(self.goals)
costs = deepcopy(self.costs)
return self.apply_action(action, goals, costs)
def valid_actions(self):
actions = []
for name, action in self.actions.items():
goals, costs = self.try_action(action)
if all( [value >= 0 for key, value in costs.items() ]):
actions.append(action)
return actions
def next_action(self):
return self.current_actions.pop() if self.current_actions else None
def discontentment(self):
return sum([goal.discontentment() for name, goal in self.goals.items()])
def __str__(self):
goals = ', '.join(([ '%s %d' % (v.name, v.value) for k,v in self.goals.items() ]))
costs = str(self.costs)
actions = ', '.join([a.name for a in self.valid_actions() ])
next = '%d:%d' % (self.valid_action_len, len(self._current_actions))
return 'id: %d, goals: {%s}, costs: %s, actions: {%s}, next: %s' % (self.id, goals, costs, actions, next)
#==============================================================================
class Agent(object):
def __init__(self, goals=None, actions=None, costs=None):
self.goals = deepcopy(goals) if goals else {}
self.actions = deepcopy(actions) if actions else {}
self.costs = deepcopy(costs) if costs else {}
if 'time' not in self.costs:
self.costs['time'] = 0
def apply_action(self, action, goals=None, costs=None):
goals = goals if goals else self.goals
costs = costs if costs else self.costs
for name, goal in goals.items():
goal.apply_action(action)
for cost, change in self.actions[action.name].costs.items():
costs[cost] = costs[cost] + change
return goals, costs
def overall_discontentment(self, goals=None, costs=None):
#calculates overall total discontentment
goals = goals if goals else self.goals
costs = costs if costs else self.costs
return sum([goal.discontentment() for name, goal in goals.items()])
def valid_actions(self):
for name, action in self.actions.items():
goals, costs = self.apply_action(action)
if all( [value >= 0 for key, value in costs.items() ]):
self.actions.append(action)
return self.actions
def action_discontentment(self, action):
goals, costs = self.try_action(action)
return self.overall_discontentment(goals, costs)
def choose_action_goap(self, max_depth):
#storage for world state and the actions used
states = [[ WorldState(self.goals, self.actions, self.costs), Action('Base')]]
#keep track of current best actions
best_action = None
best_value = 1000000
best_plan = []
verbose = True
if verbose:
print('Searching...')
changed = True
while states:
current_value = states[-1][0].discontentment()
if verbose and changed:
changed = False
level = len(states) - 1
for i, state in enumerate(states[level:], start=level):
print(' '*i, '+->[', state[1].name,'] (', str(current_value) ,')')
if len(states) >= max_depth:
if current_value < best_value:
best_action = states[1][1]
best_value = current_value
best_plan = [state[1].name for state in states if state[1]] + [best_value]
states.pop()
continue
next_action = states[-1][0].next_action()
if next_action:
new_state = deepcopy(states[-1][0])
state.append([new_state, None])
states[-1][1] = next_action
new_state.apply_action_reset(next_action)
changed = True
else:
states.pop()
return best_action, best_plan
def print_actions(agent):
print ('ACTIONS:')
for k, v in agent.actions.items():
print (" - ", v)
def print_action_evals(agent):
print ('VALID ACTIONS (DISCONTENTMENT)')
for action in agent.valid_actions():
print ("- [%s] (%d " % (action.name, agent.action_discontentment(action)))
def print_goals(agent):
print ('GOALS:')
for k, v in agent.goals.items():
print (' - ', v)
def run_drone_attack_goap(agent, depth=4):
HR = '-'*60
print_actions(agent)
print('>> Start <<')
while agent.costs['Energy'] > 0:
print(HR)
energy = agent.costs['Energy']
print ('Current Goals (Discontentment= %d) ' % agent.overall_discontentment())
print (' - ', ', '.join(['%s=%s' % (v.name, str(v.value)) for k, v in agent.goals.items()]))
print (('Current Costs'), str(agent.costs))
action, plan = agent.choose_action_goap(depth)
print ('Best Action: \n => [%s] (%d)' % (action.name, agent.action_discontentment(action)))
agent.apply_action(action)
print ('New Goals (Discontentment=%d)' % agent.overall_discontentment())
print (' - ' ', '.join('%s=%s' % (v.name, str(v.value)) for k, v in agent.goals.items()))
depth -= 1
print (HR)
print ('Energy depleted...victory!')
print (HR)
print ('>> Done! <<\n\n')
#==============================================================================
def laser_shield_energy_example():
#list that is then converted to dictionary
goals = {
Goal('Attack', 3, 1),
Goal('Shield', 4, 0),
}
goals = {g.name: g for g in goals}
actions = {
Action('Attack with Laser', goals={'Attack': -2}, costs={'Energy': -3}),
Action('Half Shield Recharge', goals={'Attack': -2}, costs={'Energy': -2}),
Action('Full Shield Recharge', goals={'Shield': -4}, costs={'Energy': -3}),
}
actions = dict([(a.name, a) for a in actions])
costs = {'Energy': 5}
drone = Agent(goals, actions, costs)
run_drone_attack_goap(drone)
if __name__ == '__main__':
laser_shield_energy_example()