isaac-sim · n7729697 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
@@ -4,7 +4,6 @@
     "version": "2.0.0",
     "tasks": [
         {
-            // setup python env
             "label": "setup_python_env",
             "type": "shell",
             "linux": {
@@ -15,15 +14,15 @@
             }
         },
         {
-            // run formatter
             "label": "run_formatter",
             "type": "shell",
             "linux": {
                 "command": "${workspaceFolder}/isaaclab.sh --format"
             },
             "windows": {
                 "command": "${workspaceFolder}/isaaclab.bat --format"
-            }
+            },
+            "problemMatcher": []
         }
     ]
 }
diff --git a/=1.4.2 b/=1.4.2
@@ -0,0 +1,27 @@
+Requirement already satisfied: skrl in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (1.4.1)
+Collecting skrl
+  Downloading skrl-1.4.2-py3-none-any.whl.metadata (4.8 kB)
+Requirement already satisfied: gymnasium in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (1.0.0)
+Requirement already satisfied: packaging in /home/xuezhi/.local/lib/python3.10/site-packages (from skrl) (24.2)
+Requirement already satisfied: tensorboard in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (2.18.0)
+Requirement already satisfied: tqdm in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from skrl) (4.67.1)
+Requirement already satisfied: numpy>=1.21.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (1.26.4)
+Requirement already satisfied: cloudpickle>=1.2.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (3.1.1)
+Requirement already satisfied: typing-extensions>=4.3.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (4.12.2)
+Requirement already satisfied: farama-notifications>=0.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from gymnasium->skrl) (0.0.4)
+Requirement already satisfied: absl-py>=0.4 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (2.1.0)
+Requirement already satisfied: grpcio>=1.48.2 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.70.0)
+Requirement already satisfied: markdown>=2.6.8 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.7)
+Requirement already satisfied: protobuf!=4.24.0,>=3.19.6 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.20.3)
+Requirement already satisfied: setuptools>=41.0.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (75.8.0)
+Requirement already satisfied: six>1.9 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (1.17.0)
+Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (0.7.2)
+Requirement already satisfied: werkzeug>=1.0.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from tensorboard->skrl) (3.1.3)
+Requirement already satisfied: MarkupSafe>=2.1.1 in /home/xuezhi/miniconda3/envs/env_isaaclab/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard->skrl) (3.0.2)
+Downloading skrl-1.4.2-py3-none-any.whl (403 kB)
+Installing collected packages: skrl
+  Attempting uninstall: skrl
+    Found existing installation: skrl 1.4.1
+    Uninstalling skrl-1.4.1:
+      Successfully uninstalled skrl-1.4.1
+Successfully installed skrl-1.4.2
diff --git a/materials/textures/joint1.png b/materials/textures/joint1.png
diff --git a/materials/textures/joint2.png b/materials/textures/joint2.png
diff --git a/materials/textures/joint3.png b/materials/textures/joint3.png
diff --git a/materials/textures/joint4.png b/materials/textures/joint4.png
diff --git a/materials/textures/joint6.png b/materials/textures/joint6.png
diff --git a/materials/textures/joint7.png b/materials/textures/joint7.png
@@ -52,6 +52,7 @@
     KINOVA_JACO2_N6S300_CFG,
     KINOVA_GEN3_N7_CFG,
     SAWYER_CFG,
+    RIDGEBACK_FRANKA_PANDA_CFG,
 )
 
 # isort: on
@@ -83,7 +84,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
 
     # Create separate groups called "Origin1", "Origin2", "Origin3"
     # Each group will have a mount and a robot on top of it
-    origins = define_origins(num_origins=6, spacing=2.0)
+    origins = define_origins(num_origins=7, spacing=2.0)
 
     # Origin 1 with Franka Panda
     prim_utils.create_prim("/World/Origin1", "Xform", translation=origins[0])
@@ -149,6 +150,14 @@ def design_scene() -> tuple[dict, list[list[float]]]:
     sawyer_arm_cfg.init_state.pos = (0.0, 0.0, 1.03)
     sawyer = Articulation(cfg=sawyer_arm_cfg)
 
+    # Origin 5 with mobile franka
+    prim_utils.create_prim("/World/Origin7", "Xform", translation=origins[6])
+    # -- Robot
+    mobileFranka_cfg = RIDGEBACK_FRANKA_PANDA_CFG.replace(prim_path="/World/Origin7/Robot")
+    mobileFranka_cfg.init_state.pos = (0.0, 0.0, 0.0)
+    mobileFranka_cfg.init_state.rot = (0.7071068, 0.0, 0.7071068, 0.0)
+    mobileFranka = Articulation(cfg=mobileFranka_cfg)
+
     # return the scene information
     scene_entities = {
         "franka_panda": franka_panda,
@@ -157,6 +166,7 @@ def design_scene() -> tuple[dict, list[list[float]]]:
         "kinova_j2n6s300": kinova_j2n6s300,
         "kinova_gen3n7": kinova_gen3n7,
         "sawyer": sawyer,
+        "mobile_franka": mobileFranka,
     }
     return scene_entities, origins
 

@@ -0,0 +1,8 @@
+{
+	// See http://go.microsoft.com/fwlink/?LinkId=827846
+	// for the documentation about the extensions.json format
+	"recommendations": [
+		"ms-python.python",
+		"ms-python.vscode-pylance"
+	],
+}
@@ -0,0 +1,25 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Attach (windows-x86_64/linux-x86_64)",
+            "type": "python",
+            "request": "attach",
+            "pathMappings": [
+            {
+                "localRoot": "${workspaceFolder}",
+                "remoteRoot": "${workspaceFolder}"
+            }],
+            "port": 3000,
+            "host": "127.0.0.1",
+            "subProcess": true,
+            "runtimeArgs": [
+                "--preserve-symlinks",
+                "--preserve-symlinks-main"
+            ]
+        }
+    ]
+}
@@ -0,0 +1,10 @@
+{
+    "editor.rulers": [120],
+    "python.languageServer": "Pylance",
+    "python.formatting.provider": "black",
+    "python.formatting.blackArgs": ["--line-length", "120"],
+    "python.linting.enabled": true,
+    "python.linting.pylintEnabled": false,
+    "python.linting.flake8Enabled": true,
+    "python.linting.flake8Args": ["--max-line-length=120"]
+}
@@ -17,6 +17,7 @@
 from .humanoid import *
 from .humanoid_28 import *
 from .kinova import *
+from .mobile_franka import *
 from .quadcopter import *
 from .ridgeback_franka import *
 from .sawyer import *

@@ -0,0 +1,82 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import isaaclab.sim as sim_utils
+from isaaclab.actuators import ImplicitActuatorCfg
+from isaaclab.assets import ArticulationCfg
+from isaaclab.utils.assets import ISAACLAB_NUCLEUS_DIR
+
+##
+# Configuration
+##
+
+MOBILE_FRANKA_CFG = ArticulationCfg(
+    spawn=sim_utils.UsdFileCfg(
+        usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd",
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            rigid_body_enabled=True,
+            max_linear_velocity=1000.0,
+            max_angular_velocity=1000.0,
+            max_depenetration_velocity=100.0,
+            enable_gyroscopic_forces=True,
+        ),
+        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
+            enabled_self_collisions=False,
+            solver_position_iteration_count=12,
+            solver_velocity_iteration_count=1,
+            sleep_threshold=0.005,
+            stabilization_threshold=0.001,
+        ),
+    ),
+    init_state=ArticulationCfg.InitialStateCfg(
+        joint_pos={
+            # base
+            "dummy_base_prismatic_x_joint": 0.0,
+            "dummy_base_prismatic_y_joint": 0.0,
+            "dummy_base_revolute_z_joint": 0.0,
+            # franka_panda
+            "panda_joint1": 0.0,
+            "panda_joint2": -1.0,
+            "panda_joint3": 0.0,
+            "panda_joint4": -2.2,
+            "panda_joint5": 0.0,
+            "panda_joint6": 2.4,
+            "panda_joint7": 0.8,
+            "panda_finger_joint1": 0.035,
+            "panda_finger_joint2": 0.035,
+        },
+        joint_vel={".*": 0.0},
+    ),
+    actuators={
+        "arm_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["panda_joint[1-7]"],
+            effort_limit=87.0,
+            velocity_limit=2.175,
+            stiffness=400.0,
+            damping=80.0,
+        ),
+        "gripper_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["panda_finger_joint1", "panda_finger_joint2"],
+            effort_limit=200.0,
+            velocity_limit=0.2,
+            stiffness=100000.0,
+            damping=1000.0,
+        ),
+        "base_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["dummy_base_prismatic_x_joint", "dummy_base_prismatic_y_joint"],
+            effort_limit=1000.0,
+            velocity_limit=100.0,  # Assuming position control
+            stiffness=999999986991104.0,
+            damping=100000.0,
+        ),
+        "base_rot_actuators": ImplicitActuatorCfg(
+            joint_names_expr=["dummy_base_revolute_z_joint"],
+            effort_limit=1000.0,
+            velocity_limit=100.0,  # Assuming position control
+            stiffness=17453292716032.0,
+            damping=1745.32922,
+        ),
+    },
+)
@@ -24,14 +24,15 @@
 RIDGEBACK_FRANKA_PANDA_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAAC_NUCLEUS_DIR}/Robots/Clearpath/RidgebackFranka/ridgeback_franka.usd",
+        # usd_path=f"/home/xuezhi/Downloads/ridgeback_franka6_instanceable.usd",
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(enabled_self_collisions=False),
         activate_contact_sensors=False,
     ),
     init_state=ArticulationCfg.InitialStateCfg(
         joint_pos={
             # base
-            "dummy_base_prismatic_y_joint": 0.0,
             "dummy_base_prismatic_x_joint": 0.0,
+            "dummy_base_prismatic_y_joint": 0.0,
             "dummy_base_revolute_z_joint": 0.0,
             # franka arm
             "panda_joint1": 0.0,

@@ -1,5 +1,4 @@
-seed: 42
-
+seed: 500
 
 # Models are instantiated using skrl's model instantiator utility
 # https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
@@ -78,5 +77,5 @@ agent:
 # https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
 trainer:
   class: SequentialTrainer
-  timesteps: 4800
+  timesteps: 6000
   environment_info: log
@@ -53,14 +53,14 @@ class CartDoublePendulumEnvCfg(DirectMARLEnvCfg):
     pendulum_action_scale = 50.0  # [Nm]
 
     # reward scales
-    rew_scale_alive = 1.0
-    rew_scale_terminated = -2.0
-    rew_scale_cart_pos = 0
-    rew_scale_cart_vel = -0.01
-    rew_scale_pole_pos = -1.0
-    rew_scale_pole_vel = -0.01
-    rew_scale_pendulum_pos = -1.0
-    rew_scale_pendulum_vel = -0.01
+    eps_alive = 1.0
+    eps_terminated = -2.0
+    eps_cart_pos = 0
+    eps_cart_vel = -0.01
+    eps_pole_pos = -1.0
+    eps_pole_vel = -0.01
+    eps_pendulum_pos = -1.0
+    eps_pendulum_vel = -0.01
 
 
 class CartDoublePendulumEnv(DirectMARLEnv):
@@ -124,23 +124,29 @@ def _get_observations(self) -> dict[str, torch.Tensor]:
         return observations
 
     def _get_rewards(self) -> dict[str, torch.Tensor]:
-        total_reward = compute_rewards(
-            self.cfg.rew_scale_alive,
-            self.cfg.rew_scale_terminated,
-            self.cfg.rew_scale_cart_pos,
-            self.cfg.rew_scale_cart_vel,
-            self.cfg.rew_scale_pole_pos,
-            self.cfg.rew_scale_pole_vel,
-            self.cfg.rew_scale_pendulum_pos,
-            self.cfg.rew_scale_pendulum_vel,
-            self.joint_pos[:, self._cart_dof_idx[0]],
-            self.joint_vel[:, self._cart_dof_idx[0]],
-            normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),
-            self.joint_vel[:, self._pole_dof_idx[0]],
-            normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),
-            self.joint_vel[:, self._pendulum_dof_idx[0]],
-            math.prod(self.terminated_dict.values()),
+        P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward = compute_rewards(
+            1.0,  # alpha
+            1.0,  # beta
+            self.cfg.eps_alive,  # eps_alive
+            self.cfg.eps_terminated,  # eps_terminated
+            self.cfg.eps_cart_vel,  # eps_cart_vel
+            self.cfg.eps_pole_pos,  # eps_pole_pos
+            self.cfg.eps_pole_vel,  # eps_pole_vel
+            self.cfg.eps_pendulum_pos,  # eps_pendulum_pos
+            self.cfg.eps_pendulum_vel,  # eps_pendulum_vel
+            self.joint_vel[:, self._cart_dof_idx[0]],  # cart_vel
+            normalize_angle(self.joint_pos[:, self._pole_dof_idx[0]]),  # pole_pos
+            self.joint_vel[:, self._pole_dof_idx[0]],  # pole_vel
+            normalize_angle(self.joint_pos[:, self._pendulum_dof_idx[0]]),  # pendulum_pos
+            self.joint_vel[:, self._pendulum_dof_idx[0]],  # pendulum_vel
+            math.prod(self.terminated_dict.values()),  # reset_terminated
         )
+        if "log" not in self.extras:
+            self.extras["log"] = dict()
+        self.extras["log"]["P_cart_0"] = P_cart_0.mean()
+        self.extras["log"]["P_pendulum_0"] = P_pendulum_0.mean()
+        self.extras["log"]["Delta_P_cart"] = Delta_P_cart.mean()
+        self.extras["log"]["Delta_P_pendulum"] = Delta_P_pendulum.mean()
         return total_reward
 
     def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
@@ -193,34 +199,44 @@ def normalize_angle(angle):
 
 @torch.jit.script
 def compute_rewards(
-    rew_scale_alive: float,
-    rew_scale_terminated: float,
-    rew_scale_cart_pos: float,
-    rew_scale_cart_vel: float,
-    rew_scale_pole_pos: float,
-    rew_scale_pole_vel: float,
-    rew_scale_pendulum_pos: float,
-    rew_scale_pendulum_vel: float,
-    cart_pos: torch.Tensor,
+    alpha: float,
+    beta: float,
+    eps_alive: float,
+    eps_terminated: float,
+    eps_cart_vel: float,
+    eps_pole_pos: float,
+    eps_pole_vel: float,
+    eps_pendulum_pos: float,
+    eps_pendulum_vel: float,
     cart_vel: torch.Tensor,
     pole_pos: torch.Tensor,
     pole_vel: torch.Tensor,
     pendulum_pos: torch.Tensor,
     pendulum_vel: torch.Tensor,
     reset_terminated: torch.Tensor,
 ):
-    rew_alive = rew_scale_alive * (1.0 - reset_terminated.float())
-    rew_termination = rew_scale_terminated * reset_terminated.float()
-    rew_pole_pos = rew_scale_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1)
-    rew_pendulum_pos = rew_scale_pendulum_pos * torch.sum(
-        torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
+    # Base reward components
+    P_cart_0 = (
+        eps_alive * (1.0 - reset_terminated.float())
+        + eps_terminated * reset_terminated.float()
+        + eps_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
     )
-    rew_cart_vel = rew_scale_cart_vel * torch.sum(torch.abs(cart_vel).unsqueeze(dim=1), dim=-1)
-    rew_pole_vel = rew_scale_pole_vel * torch.sum(torch.abs(pole_vel).unsqueeze(dim=1), dim=-1)
-    rew_pendulum_vel = rew_scale_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
-
-    total_reward = {
-        "cart": rew_alive + rew_termination + rew_pole_pos + rew_cart_vel + rew_pole_vel,
-        "pendulum": rew_alive + rew_termination + rew_pendulum_pos + rew_pendulum_vel,
-    }
-    return total_reward
+
+    P_pendulum_0 = eps_alive * (1.0 - reset_terminated.float()) + eps_terminated * reset_terminated.float()
+
+    # Cooperative (mutualistic) terms
+    Delta_P_cart = eps_pole_pos * torch.sum(torch.square(pole_pos).unsqueeze(dim=1), dim=-1) + eps_pole_vel * torch.sum(
+        torch.abs(pole_vel).unsqueeze(dim=1), dim=-1
+    )
+
+    Delta_P_pendulum = eps_pendulum_pos * torch.sum(
+        torch.square(pole_pos + pendulum_pos).unsqueeze(dim=1), dim=-1
+    ) + eps_pendulum_vel * torch.sum(torch.abs(pendulum_vel).unsqueeze(dim=1), dim=-1)
+
+    # Final rewards incorporating mutualistic principles
+    R_cart = alpha * P_cart_0 + beta * Delta_P_cart
+    R_pendulum = alpha * P_pendulum_0 + beta * Delta_P_pendulum
+
+    total_reward = {"cart": R_cart, "pendulum": R_pendulum}
+
+    return P_cart_0, P_pendulum_0, Delta_P_cart, Delta_P_pendulum, total_reward
@@ -0,0 +1,26 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+ShadowHand Over environment.
+"""
+
+import gymnasium as gym
+
+from . import agents
+
+##
+# Register Gym environments.
+##
+
+gym.register(
+    id="MobileFrankaMARL",
+    entry_point=f"{__name__}.mobile_franka_marl:MobileFrankaEnv",
+    disable_env_checker=True,
+    kwargs={
+        "env_cfg_entry_point": f"{__name__}.mobile_franka_marl_cfg:MobileFrankaMARLCfg",
+        "skrl_mappo_cfg_entry_point": f"{agents.__name__}:skrl_mappo_cfg.yaml",
+    },
+)
@@ -0,0 +1,4 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
@@ -0,0 +1,82 @@
+seed: 42
+
+# Models are instantiated using skrl's model instantiator utility
+# https://skrl.readthedocs.io/en/latest/api/utils/model_instantiators.html
+
+models:
+  separate: True
+  policy:  # see gaussian_model parameters
+    class: GaussianMixin
+    clip_actions: False
+    clip_log_std: True
+    min_log_std: -20.0
+    max_log_std: 2.0
+    initial_log_std: 0.0
+    network:
+      - name: net
+        input: STATES
+        layers: [512, 512, 256, 128]
+        activations: elu
+    output: ACTIONS
+  value:  # see deterministic_model parameters
+    class: DeterministicMixin
+    clip_actions: False
+    network:
+      - name: net
+        input: STATES
+        layers: [512, 512, 256, 128]
+        activations: elu
+    output: ONE
+
+
+# Rollout memory
+# https://skrl.readthedocs.io/en/latest/api/memories/random.html
+memory:
+  class: RandomMemory
+  memory_size: -1  # automatically determined (same as agent:rollouts)
+
+
+# MAPPO agent configuration (field names are from MAPPO_DEFAULT_CONFIG)
+# https://skrl.readthedocs.io/en/latest/api/multi_agents/mappo.html
+agent:
+  class: MAPPO
+  rollouts: 16
+  learning_epochs: 5
+  mini_batches: 4
+  discount_factor: 0.99
+  lambda: 0.95
+  learning_rate: 5.0e-04
+  learning_rate_scheduler: KLAdaptiveLR
+  learning_rate_scheduler_kwargs:
+    kl_threshold: 0.016
+  state_preprocessor: RunningStandardScaler
+  state_preprocessor_kwargs: null
+  shared_state_preprocessor: RunningStandardScaler
+  shared_state_preprocessor_kwargs: null
+  value_preprocessor: RunningStandardScaler
+  value_preprocessor_kwargs: null
+  random_timesteps: 0
+  learning_starts: 0
+  grad_norm_clip: 1.0
+  ratio_clip: 0.2
+  value_clip: 0.2
+  clip_predicted_values: True
+  entropy_loss_scale: 0.0
+  value_loss_scale: 2.0
+  kl_threshold: 0.0
+  rewards_shaper_scale: 1.0
+  time_limit_bootstrap: False
+  # logging and checkpoint
+  experiment:
+    directory: "mobile_franka_mappo"
+    experiment_name: ""
+    write_interval: auto
+    checkpoint_interval: auto
+
+
+# Sequential trainer
+# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
+trainer:
+  class: SequentialTrainer
+  timesteps: 36000
+  environment_info: log
@@ -0,0 +1,148 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from isaaclab_assets.robots.mobile_franka import MOBILE_FRANKA_CFG
+from isaaclab_assets.robots.ridgeback_franka import RIDGEBACK_FRANKA_PANDA_CFG
+
+import isaaclab.sim as sim_utils
+from isaaclab.assets import ArticulationCfg, RigidObjectCfg
+from isaaclab.envs import DirectMARLEnvCfg
+from isaaclab.markers import VisualizationMarkersCfg
+from isaaclab.scene import InteractiveSceneCfg
+from isaaclab.sim import PhysxCfg, SimulationCfg
+from isaaclab.sim.spawners.materials.physics_materials_cfg import RigidBodyMaterialCfg
+
+# from isaaclab.managers import EventTermCfg as EventTerm, SceneEntityCfg
+from isaaclab.utils import configclass
+
+
+@configclass
+class MobileFrankaMARLCfg(DirectMARLEnvCfg):
+    # Environment settings
+    decimation = 2
+    episode_length_s = 500 / (120 / 2)  # Adjusted for control frequency
+    possible_agents = ["franka", "base"]
+    action_spaces = {"franka": 7, "base": 3}
+    observation_spaces = {"franka": 40, "base": 40}
+    state_space = -1
+
+    # Simulation settings
+    sim: SimulationCfg = SimulationCfg(
+        dt=1 / 120,
+        render_interval=decimation,
+        gravity=(0.0, 0.0, -9.81),
+        physics_material=RigidBodyMaterialCfg(
+            static_friction=1.0,
+            dynamic_friction=1.0,
+        ),
+        physx=PhysxCfg(
+            # solver_position_iteration_count=12,
+            # solver_velocity_iteration_count=6,
+            # contact_offset=0.005,
+            # rest_offset=0.0,
+            bounce_threshold_velocity=0.2,
+            # enable_sleeping=True,
+            # max_depenetration_velocity=1000.0,
+        ),
+    )
+
+    # Robot configuration
+    mobile_franka_cfg: ArticulationCfg = MOBILE_FRANKA_CFG.replace(prim_path="/World/envs/env_.*/MobileFranka").replace(
+        init_state=ArticulationCfg.InitialStateCfg(
+            pos=(0.0, 0.0, 0.0),
+            # rot=(0.7071068, 0.0, 0.7071068, 0.0),
+            # rot=(1.0, 0.0, 0.0, 0.0),
+            joint_pos={".*": 0.0},
+        ),
+        # solver_position_iteration_count=12,
+        # solver_velocity_iteration_count=1,
+        # enable_self_collisions=False,
+        # enable_gyroscopic_forces=True,
+    )
+
+    actuated_joint_names = [
+        "panda_joint1",
+        "panda_joint2",
+        "panda_joint3",
+        "panda_joint4",
+        "panda_joint5",
+        "panda_joint6",
+        "panda_joint7",
+    ]
+
+    mobile_base_names = [
+        "dummy_base_prismatic_x_joint",
+        "dummy_base_prismatic_y_joint",
+        "dummy_base_revolute_z_joint",
+    ]
+
+    xy_base_names = [
+        "dummy_base_prismatic_x_joint",
+        "dummy_base_prismatic_y_joint",
+    ]
+
+    finger_joint_names = [
+        "panda_finger_joint1",
+        "panda_finger_joint2",
+    ]
+
+    finger_body_names = [
+        "panda_leftfinger",
+        # "panda_finger2",
+    ]
+
+    # object configuration
+    # target_cube_cfg: RigidObjectCfg = RigidObjectCfg(
+    #     prim_path="/World/envs/env_.*/object",
+    #     spawn=sim_utils.SphereCfg(
+    #         radius=0.1,
+    #         visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
+    #         physics_material=sim_utils.RigidBodyMaterialCfg(static_friction=0.7),
+    #         rigid_props=sim_utils.RigidBodyPropertiesCfg(
+    #             kinematic_enabled=False,
+    #             disable_gravity=False,
+    #             enable_gyroscopic_forces=True,
+    #             solver_position_iteration_count=8,
+    #             solver_velocity_iteration_count=0,
+    #             sleep_threshold=0.005,
+    #             stabilization_threshold=0.0025,
+    #             max_depenetration_velocity=1000.0,
+    #         ),
+    #         collision_props=sim_utils.CollisionPropertiesCfg(),
+    #         mass_props=sim_utils.MassPropertiesCfg(density=500.0),
+    #     ),
+    #     init_state=RigidObjectCfg.InitialStateCfg(pos=(2.0, 0.0, 0.5), rot=(1.0, 0.0, 0.0, 0.0)),
+    # )
+    # goal object
+    goal_object_cfg: VisualizationMarkersCfg = VisualizationMarkersCfg(
+        prim_path="/Visuals/goal_marker",
+        markers={
+            "goal": sim_utils.SphereCfg(
+                radius=0.1,
+                visual_material=sim_utils.PreviewSurfaceCfg(diffuse_color=(1.0, 0.0, 0.0)),
+            ),
+        },
+    )
+
+    # Scene settings
+    scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=512, env_spacing=3.0, replicate_physics=True)
+
+    action_scale = 7.5
+    dof_velocity_scale = 0.1
+    max_base_pos = 3.0
+
+    # Reward scales
+    dist_reward_scale = 20
+    rot_reward_scale = 0.5
+    around_handle_reward_scale = 10.0
+    open_reward_scale = 7.5
+    finger_dist_reward_scale = 100.0
+    action_penalty_scale = 0.01
+    finger_close_reward_scale = 10.0
+    act_moving_average = 1.0
+    # Reset noise
+    reset_position_noise = 0.0
+    reset_dof_pos_noise = 0.0
+    reset_dof_vel_noise = 0.0
@@ -1,4 +1,4 @@
-seed: 42
+seed: 500
 
 
 # Models are instantiated using skrl's model instantiator utility

@@ -408,6 +408,17 @@ def _compute_intermediate_values(self):
 
 @torch.jit.script
 def scale(x, lower, upper):
+    """
+    Scales the input tensor x from the range [-1, 1] to the range [lower, upper].
+
+    Args:
+        x (torch.Tensor): Input tensor to be scaled.
+        lower (torch.Tensor): Lower bound of the target range.
+        upper (torch.Tensor): Upper bound of the target range.
+
+    Returns:
+        torch.Tensor: Scaled tensor.
+    """
     return 0.5 * (x + 1.0) * (upper - lower) + lower
 
 
@@ -418,6 +429,18 @@ def unscale(x, lower, upper):
 
 @torch.jit.script
 def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
+    """
+    Randomizes the rotation based on random values and unit tensors.
+
+    Args:
+        rand0 (torch.Tensor): Random values for the first rotation axis.
+        rand1 (torch.Tensor): Random values for the second rotation axis.
+        x_unit_tensor (torch.Tensor): Unit tensor for the x-axis.
+        y_unit_tensor (torch.Tensor): Unit tensor for the y-axis.
+
+    Returns:
+        torch.Tensor: The resulting quaternion after applying the random rotations.
+    """
     return quat_mul(
         quat_from_angle_axis(rand0 * np.pi, x_unit_tensor), quat_from_angle_axis(rand1 * np.pi, y_unit_tensor)
     )
@@ -51,6 +51,7 @@ def load_cfg_from_registry(task_name: str, entry_point_key: str) -> dict | objec
     Raises:
         ValueError: If the entry point key is not available in the gym registry for the task.
     """
+
     # obtain the configuration entry point
     cfg_entry_point = gym.spec(task_name).kwargs.get(entry_point_key)
     # check if entry point exists