Source code for gym_nethack.policies.combat

import random

from gym_nethack.policies.core import Policy
from gym_nethack.nhdata import *

[docs]class ApproachAttackPolicy(Policy): """Heuristic policy for NetHack combat that randomly equips a weapon (and armor, if specified), then approaches the monster and attacks it at close range. (If ranged weapon equipped, it will attack from a distance instead of approaching.)""" def __init__(self): """Initialize the policy.""" self.tried_armor_indices = [] self.weapon_choice = 50
[docs] def set_config(self, equip_armor=False): """Set policy parameters. Args: equip_armor: whether to randomly choose a piece of armor and equip it, to a maximum of five pieces of armor, before starting to approach and attack the monster.""" self.equip_armor = equip_armor self.name = 'appatkWA' if equip_armor else 'appatkW'
[docs] def select_action(self, q_values, valid_action_indices): """Return the action corresponding to the heuristic policy. Args: q_values: list of q-values, one per action valid_action_indices: indices of legal actions (corresponding to the abilities list) """ if self.agent.episode_step == 0: self.tried_armor_indices = [] valid_weapons = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['melee', 'ranged']] + [4] self.weapon_choice = random.choice(valid_weapons) return self.weapon_choice if self.equip_armor and len(self.tried_armor_indices) < 5: valid_armor = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ARMOR_TYPES and i not in self.tried_armor_indices] chosen_armor = random.choice(valid_armor) self.tried_armor_indices.append(chosen_armor) return chosen_armor # try to equip all armors in succession before beginning approach/attack. if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged': # ranged weapon equipped, so try to shoot projectile. projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile'] if len(projectile_actions) > 0: return random.choice(projectile_actions) # otherwise, do a melee attack if possible (i.e., if in range) if 1 in valid_action_indices: return 1 # attack if possible elif 2 in valid_action_indices: return 2 # otherwise line up (closer) elif 0 in valid_action_indices: return 0 # regular approach else: return 5 # wait if monster invis.
[docs]class ApproachAttackItemPolicy(Policy): """Heuristic policy for NetHack combat that randomly equips a weapon (and armor, if specified), then uses a random item with probability 0.25, and approaches the monster and attacks it at close range with probability 0.75. (If ranged weapon equipped, it will attack from a distance instead of approaching.)""" def __init__(self): """Initialize the policy.""" self.tried_armor_indices = [] self.weapon_choice = 50
[docs] def set_config(self, equip_armor=False): """Set policy parameters. Args: equip_armor: whether to randomly choose a piece of armor and equip it, to a maximum of five pieces of armor, before starting to approach and attack the monster.""" self.equip_armor = equip_armor self.name = 'appatkitemWASPWa' if equip_armor else 'appatkWPSW'
[docs] def select_action(self, q_values, valid_action_indices): """Return the action corresponding to the heuristic policy. Args: q_values: list of q-values, one per action valid_action_indices: indices of legal actions (corresponding to the abilities list) """ assert len(valid_action_indices) > 0 if self.agent.episode_step == 0: self.tried_armor_indices = [] valid_weapons = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['melee', 'ranged']] + [4] self.weapon_choice = random.choice(valid_weapons) return self.weapon_choice if self.equip_armor: valid_armor = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ARMOR_TYPES and i not in self.tried_armor_indices] if len(valid_armor) > 0: chosen_armor = random.choice(valid_armor) self.tried_armor_indices.append(chosen_armor) return chosen_armor # try to equip some armors in succession before beginning approach/attack. if random.random() < 0.75: # try to attack first. if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged': # ranged weapon equipped, so try to shoot projectile. projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile'] if len(projectile_actions) > 0: return random.choice(projectile_actions) # otherwise, do a melee attack if possible (i.e., if in range) if 1 in valid_action_indices: return 1 # attack if possible elif 2 in valid_action_indices: return 2 # otherwise line up (closer) elif 0 in valid_action_indices: return 0 # regular approach else: usable_item_indices = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['potion', 'scroll', 'wand', 'ring']] if max(valid_action_indices) > 9 and len(usable_item_indices) > 0: return random.choice(usable_item_indices) else: return 6 # no item available, so do random move. else: # try to use random item first. usable_item_indices = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['potion', 'scroll', 'wand', 'ring']] if max(valid_action_indices) > 9 and len(usable_item_indices) > 0: return random.choice(usable_item_indices) else: if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged': # ranged weapon equipped, so try to shoot projectile. projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile'] if len(projectile_actions) > 0: return random.choice(projectile_actions) # otherwise, do a melee attack if possible (i.e., if in range) if 1 in valid_action_indices: return 1 # attack if possible elif 2 in valid_action_indices: return 2 # otherwise line up (closer) elif 0 in valid_action_indices: return 0 # regular approach else: return 6 # random move
[docs]class FireAntPolicy(Policy): """Heuristic policy for fire ant, as described in my thesis.""" def __init__(self): """Initialize the policy.""" super().__init__(name='fAntP') self.new_episode()
[docs] def new_episode(self): """Start a new episode, resetting policy state.""" self.used_wand_of_cancellation = False self.equipped_tsurugi = False
[docs] def select_action(self, q_values, valid_action_indices): """Return the action corresponding to the heuristic policy. Args: q_values: list of q-values, one per action valid_action_indices: indices of legal actions (corresponding to the abilities list) """ if self.agent.episode_step == 0: self.new_episode() if not self.equipped_tsurugi: for i in valid_action_indices: if i > 8 and self.agent.env.abilities[i].full_name == 'uncursed +0 tsurugi': self.equipped_tsurugi = True return i assert False if not self.used_wand_of_cancellation: # check if we are lined up. for i in valid_action_indices: if i > 8 and self.agent.env.abilities[i].full_name == 'uncursed wand of cancellation': self.used_wand_of_cancellation = True return i # probably not lined up. return 3 # line up (farther) action if 1 not in valid_action_indices: return 0 # approach monster return 1 # attack when in range.