Source code for machine_common_sense.step_metadata

import copy

from .goal_metadata import GoalMetadata
from .return_status import ReturnStatus
from .stringifier import Stringifier

[docs]class StepMetadata: """ Defines output metadata from an action step in the MCS 3D environment. Attributes ---------- action_list : list of (string, dict) tuples The list of all actions that are available for the next step. Each action is returned as a tuple containing the action string and the action's restricted parameters, if any. For example: ("Pass", {}) forces a Pass action; ("PickupObject", {}) forces a PickupObject action with any parameters; and ("PickupObject", {"objectId": "a"}) forces a PickupObject action with the specific parameters objectId=a. EndHabituation is a special case of the action_list where its parameters will always be empty. When taking the EndHabituation action, the MCS system may apply hidden displacement parameters to the robot. An action_list of None or an empty list means that all actions will be available for the next step. To "step" using the first action from the action_list: .. highlight:: python .. code-block:: python step_metadata = controller.start_scene(scene_data) action, params = step_metadata.action_list[0] step_metadata = controller.step(action, **params) Derived from :mod:`GoalMetadata.action_list[step_number] <machine_common_sense.GoalMetadata>`. May be a subset of all possible actions. See :mod:`Action <machine_common_sense.Action>`. camera_aspect_ratio : (float, float) The player camera's aspect ratio. This will remain constant for the whole scene. camera_clipping_planes : (float, float) The player camera's near and far clipping planes, in meters. This will remain constant for the whole scene. Default (0.01, 150) camera_field_of_view : float The player camera's field of view. This will remain constant for the whole scene. camera_height : float The player camera's height, in meters. depth_map_list : list of 2D numpy arrays The list of 2-dimensional numpy arrays of depth float data from the scene after the last action and physics simulation were run. This is usually a list with 1 array, except for the output from start_scene for a scene with a scripted Preview Phase (Preview Phase case details TBD). Each 32-bit depth float in the 2-dimensional numpy array is a value between the camera's near clipping plane (default 0.01) and the camera's far clipping plane (default 150) corresponding to the depth, in meters, at that pixel in the image. Note that this list will be empty if the metadata level is 'none'. goal : GoalMetadata or None The goal for the whole scene. Will be None in "Exploration" scenes. haptic_feedback : dict Haptic feedback sources for the agent. Values are true or false depending on if the agent is touching the haptic feedback source. The only current supported contact is "on_lava" habituation_trial : int or None The current habituation trial (as a positive integer), or None if the scene is not currently in a habituation trial (meaning this scene is in a test trial). head_tilt : float How far your head is tilted up/down in degrees (between 90 and -90). Changed by setting the "horizon" parameter in a "RotateLook" action. holes : list of tuples Coordinates of holes as (X, Z) float tuples. Will be set to 'None' if using a metadata level below the 'oracle' level. image_list : list of Pillow.Image objects The list of images from the scene after the last action and physics simulation were run. This is usually a list with 1 image, except for the output from start_scene for a scene with a scripted Preview Phase. (Preview Phase case details TBD). lava : list of tuples Coordinates of pools of lava as (X1, Z1, X2, Z2) float tuples, where X1/Z1 is the top-left corner and X2/Z2 is the bottom-right conrer. Will be set to 'None' if using a metadata level below the 'oracle' level. object_list : list of ObjectMetadata objects The list of metadata for all the visible interactive objects in the scene. This list will be empty if using a metadata level below the 'oracle' level. For metadata on structural objects like walls, please see structural_object_list object_mask_list : list of Pillow.Image objects The list of object mask (instance segmentation) images from the scene after the last action and physics simulation were run. This is usually a list with 1 image, except for the output from start_scene for a scene with a scripted Preview Phase (Preview Phase case details TBD). The color of each object in the mask corresponds to the "color" property in its ObjectMetadata object. Note that this list will be empty if the metadata level is 'none' or 'level1'. performer_radius: float The radius of the performer, in meters. performer_reach: float The max reach of the performer, in meters. physics_frames_per_second : float The frames per second of the physics engine position : dict The "x", "y", and "z" coordinates for your global position. Will be set to 'None' if using a metadata level below the 'oracle' level. resolved_object : string The object that was selected based on objectImageCoords resolved_receptacle_object_id : string The receptacle that was selected based on receptacleObjectImageCoords return_status : string The return status from your last action. See :mod:`Action <machine_common_sense.Action>`. reward : integer Reward is 1 on successful completion of a task, 0 otherwise. room_dimensions : dict The "x", "y", and "z" dimensions of the current scene. Will be set to 'None' if using a metadata level below the 'oracle' level. rotation : float Your current rotation angle in degrees. Will be set to 'None' if using a metadata level below the 'oracle' level. segmentation_colors : list of dicts The colors for all objects in the instance segmentation images (in `object_mask_list`), each represented as a dict containing an "objectId" string property and "r", "g", and "b" int properties for the corresponding red, green, and blue values. The ceiling has an objectId of "ceiling"; exterior room walls have objectIds of "wall_back", "wall_front", "wall_left", and "wall_right"; floor sections have objectIds starting with "floor " and then the texture name (since different areas of the floor can have different textures); holes have objectIds of "hole"; hole walls have objectIds of "hole wall"; and lava areas have objectIds of "lava". Will be empty if using a metadata level below the 'oracle' level. step_number : integer The step number of your last action, recorded since you started the current scene. steps_in_lava : integer The number of steps the agent has touched lava structural_object_list : list of ObjectMetadata objects The list of metadata for all the visible structural objects (like walls, occluders, and ramps) in the scene. This list will be empty if using a metadata level below the 'oracle' level. Occluders are composed of two separate objects, the "wall" and the "pole", with corresponding object IDs (occluder_wall_<uuid> and occluder_pole_<uuid>), and ramps are composed of between one and three objects (depending on the type of ramp), with corresponding object IDs. triggered_by_sequence_incorrect : bool If the the sequence to trigger a placer holding the target is incorrect """ def __init__( self, action_list=None, camera_aspect_ratio=None, camera_clipping_planes=None, camera_field_of_view=0.0, camera_height=0.0, depth_map_list=None, goal=None, habituation_trial=None, haptic_feedback=None, head_tilt=0.0, holes=None, image_list=None, lava=None, object_list=None, object_mask_list=None, performer_radius=0.0, performer_reach=0.0, physics_frames_per_second=0, position=None, resolved_object='', resolved_receptacle='', return_status=ReturnStatus.UNDEFINED.value, reward=0, room_dimensions=None, rotation=0.0, segmentation_colors=None, step_number=0, steps_on_lava=0, structural_object_list=None, triggered_by_sequence_incorrect=False ): self.action_list = [] if action_list is None else action_list self.camera_aspect_ratio = ( 0.0, 0.0) if camera_aspect_ratio is None else camera_aspect_ratio self.camera_clipping_planes = ( (0.0, 0.0) if camera_clipping_planes is None else camera_clipping_planes ) self.camera_field_of_view = camera_field_of_view self.camera_height = camera_height self.depth_map_list = ( [] if depth_map_list is None else depth_map_list ) self.goal = GoalMetadata() if goal is None else goal self.habituation_trial = habituation_trial self.haptic_feedback = ( {} if haptic_feedback is None else haptic_feedback ) self.head_tilt = head_tilt self.holes = [] if holes is None else holes self.image_list = [] if image_list is None else image_list self.lava = [] if lava is None else lava self.object_list = [] if object_list is None else object_list self.object_mask_list = ( [] if object_mask_list is None else object_mask_list ) self.performer_radius = performer_radius self.performer_reach = performer_reach self.physics_frames_per_second = physics_frames_per_second self.position = {} if position is None else position self.resolved_object = resolved_object self.resolved_receptacle = resolved_receptacle self.return_status = return_status self.reward = reward self.room_dimensions = ( {} if room_dimensions is None else room_dimensions ) self.rotation = rotation self.segmentation_colors = ( [] if segmentation_colors is None else segmentation_colors ) self.step_number = step_number self.steps_on_lava = steps_on_lava self.structural_object_list = [ ] if structural_object_list is None else structural_object_list self.triggered_by_sequence_incorrect = triggered_by_sequence_incorrect def __str__(self): return Stringifier.class_to_str(self) def check_list_none(self, obj_list): if obj_list is None: return None else: return {obj.uuid: dict(obj) for obj in obj_list}
[docs] def copy_without_depth_or_images(self): """Return a deep copy of this StepMetadata with default depth_map_list, image_list, and object_mask_list properties.""" step_metadata_copy = StepMetadata() # This class's __iter__ function will ignore specific properties. for key, _ in self: setattr(step_metadata_copy, key, copy.deepcopy(getattr(self, key))) return step_metadata_copy
# Allows converting the class to a dictionary, along with allowing # certain fields to be left out of output file def __iter__(self): yield 'action_list', self.action_list yield 'camera_aspect_ratio', self.camera_aspect_ratio yield 'camera_clipping_planes', self.camera_clipping_planes yield 'camera_field_of_view', self.camera_field_of_view yield 'camera_height', self.camera_height # Intentionally no depth_map_list yield 'goal', dict(self.goal) yield 'habituation_trial', self.habituation_trial yield 'haptic_feedback', self.haptic_feedback yield 'head_tilt', self.head_tilt yield 'holes', self.head_tilt # Intentionally no image_list yield 'lava', self.head_tilt yield 'object_list', self.check_list_none(self.object_list) # Intentionally no object_mask_list yield 'performer_radius', self.performer_radius yield 'performer_reach', self.performer_reach yield 'physics_frames_per_second', self.physics_frames_per_second yield 'position', self.position yield 'resolved_object', self.resolved_object yield 'resolved_receptacle', self.resolved_receptacle yield 'return_status', self.return_status yield 'room_dimensions', self.room_dimensions yield 'reward', self.reward yield 'rotation', self.rotation yield 'segmentation_colors', self.segmentation_colors yield 'step_number', self.step_number yield 'steps_on_lava', self.steps_on_lava yield 'structural_object_list', self.check_list_none( self.structural_object_list) yield 'triggered_by_sequence_incorrect', \ self.triggered_by_sequence_incorrect