Source code for gym_nethack.policies.combat

import random

from gym_nethack.policies.core import Policy
from gym_nethack.nhdata import *

[docs]class ApproachAttackPolicy(Policy):
    """Heuristic policy for NetHack combat that randomly equips a weapon (and armor, if specified), then approaches the monster and attacks it at close range. (If ranged weapon equipped, it will attack from a distance instead of approaching.)"""
    def __init__(self):
        """Initialize the policy."""
        self.tried_armor_indices = []
        self.weapon_choice = 50
    
[docs]    def set_config(self, equip_armor=False):
        """Set policy parameters.
        
        Args:
            equip_armor: whether to randomly choose a piece of armor and equip it, to a maximum of five pieces of armor, before starting to approach and attack the monster."""
        self.equip_armor = equip_armor
        self.name = 'appatkWA' if equip_armor else 'appatkW'

[docs]    def select_action(self, q_values, valid_action_indices):
        """Return the action corresponding to the heuristic policy.

        Args: 
            q_values: list of q-values, one per action
            valid_action_indices: indices of legal actions (corresponding to the abilities list)
        """
        if self.agent.episode_step == 0:
            self.tried_armor_indices = []
            valid_weapons = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['melee', 'ranged']] + [4]
            self.weapon_choice = random.choice(valid_weapons)
            return self.weapon_choice
        if self.equip_armor and len(self.tried_armor_indices) < 5:
            valid_armor = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ARMOR_TYPES and i not in self.tried_armor_indices]
            chosen_armor = random.choice(valid_armor)
            self.tried_armor_indices.append(chosen_armor)
            return chosen_armor # try to equip all armors in succession before beginning approach/attack.
        
        if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged':
            # ranged weapon equipped, so try to shoot projectile.
            projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile']
            if len(projectile_actions) > 0:
                return random.choice(projectile_actions)
        
        # otherwise, do a melee attack if possible (i.e., if in range)
        if 1 in valid_action_indices:
            return 1 # attack if possible
        elif 2 in valid_action_indices:
            return 2 # otherwise line up (closer)
        elif 0 in valid_action_indices:
            return 0 # regular approach
        else:
            return 5 # wait if monster invis.

[docs]class ApproachAttackItemPolicy(Policy):
    """Heuristic policy for NetHack combat that randomly equips a weapon (and armor, if specified), then uses a random item with probability 0.25, and approaches the monster and attacks it at close range with probability 0.75. (If ranged weapon equipped, it will attack from a distance instead of approaching.)"""
    def __init__(self):
        """Initialize the policy."""
        self.tried_armor_indices = []
        self.weapon_choice = 50
    
[docs]    def set_config(self, equip_armor=False):
        """Set policy parameters.
        
        Args:
            equip_armor: whether to randomly choose a piece of armor and equip it, to a maximum of five pieces of armor, before starting to approach and attack the monster."""
        self.equip_armor = equip_armor
        self.name = 'appatkitemWASPWa' if equip_armor else 'appatkWPSW'

[docs]    def select_action(self, q_values, valid_action_indices):
        """Return the action corresponding to the heuristic policy.

        Args: 
            q_values: list of q-values, one per action
            valid_action_indices: indices of legal actions (corresponding to the abilities list)
        """
        assert len(valid_action_indices) > 0
        if self.agent.episode_step == 0:
            self.tried_armor_indices = []
            valid_weapons = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['melee', 'ranged']] + [4]
            self.weapon_choice = random.choice(valid_weapons)
            return self.weapon_choice
        if self.equip_armor:
            valid_armor = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ARMOR_TYPES and i not in self.tried_armor_indices]
            if len(valid_armor) > 0:
                chosen_armor = random.choice(valid_armor)
                self.tried_armor_indices.append(chosen_armor)
                return chosen_armor # try to equip some armors in succession before beginning approach/attack.
                
        if random.random() < 0.75:
            # try to attack first.
            if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged':
                # ranged weapon equipped, so try to shoot projectile.
                projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile']
                if len(projectile_actions) > 0:
                    return random.choice(projectile_actions)
        
            # otherwise, do a melee attack if possible (i.e., if in range)
            if 1 in valid_action_indices:
                return 1 # attack if possible
            elif 2 in valid_action_indices:
                return 2 # otherwise line up (closer)
            elif 0 in valid_action_indices:
                return 0 # regular approach
            else:
                usable_item_indices = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['potion', 'scroll', 'wand', 'ring']]
                if max(valid_action_indices) > 9 and len(usable_item_indices) > 0:
                    return random.choice(usable_item_indices)
                else:
                    return 6 # no item available, so do random move.
        
        else:
            # try to use random item first.
            usable_item_indices = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type in ['potion', 'scroll', 'wand', 'ring']]
            if max(valid_action_indices) > 9 and len(usable_item_indices) > 0:
                return random.choice(usable_item_indices)
            else:
                if isinstance(self.agent.env.abilities[self.weapon_choice], tuple) and self.agent.env.abilities[self.weapon_choice].type == 'ranged':
                    # ranged weapon equipped, so try to shoot projectile.
                    projectile_actions = [i for i in valid_action_indices if i not in range(9) and self.agent.env.abilities[i].type == 'projectile']
                    if len(projectile_actions) > 0:
                        return random.choice(projectile_actions)
        
                # otherwise, do a melee attack if possible (i.e., if in range)
                if 1 in valid_action_indices:
                    return 1 # attack if possible
                elif 2 in valid_action_indices:
                    return 2 # otherwise line up (closer)
                elif 0 in valid_action_indices:
                    return 0 # regular approach
                else:
                    return 6 # random move

[docs]class FireAntPolicy(Policy):
    """Heuristic policy for fire ant, as described in my thesis."""
    def __init__(self):
        """Initialize the policy."""
        super().__init__(name='fAntP')
        self.new_episode()
    
[docs]    def new_episode(self):
        """Start a new episode, resetting policy state."""
        self.used_wand_of_cancellation = False
        self.equipped_tsurugi = False
    
[docs]    def select_action(self, q_values, valid_action_indices):
        """Return the action corresponding to the heuristic policy.

        Args: 
            q_values: list of q-values, one per action
            valid_action_indices: indices of legal actions (corresponding to the abilities list)
        """
        if self.agent.episode_step == 0:
            self.new_episode()
        
        if not self.equipped_tsurugi:
            for i in valid_action_indices:
                if i > 8 and self.agent.env.abilities[i].full_name == 'uncursed +0 tsurugi':
                    self.equipped_tsurugi = True
                    return i
            assert False
        
        if not self.used_wand_of_cancellation:
            # check if we are lined up.
            for i in valid_action_indices:
                if i > 8 and self.agent.env.abilities[i].full_name == 'uncursed wand of cancellation':
                    self.used_wand_of_cancellation = True
                    return i
            # probably not lined up.
            return 3 # line up (farther) action
        
        if 1 not in valid_action_indices:
            return 0 # approach monster
        return 1 # attack when in range.
Source code for gym_nethack.policies.combat

gym_nethack

Navigation

Related Topics