refector: use mujoco to replace genesis

build: make environment adapt to Nixos
build: deps and environment change
2026-01-29 11:27:49 +08:00 · 2025-12-08 16:44:16 +08:00 · 2025-12-08 16:04:06 +08:00 · 2025-12-08 15:38:00 +08:00
6 changed files with 1237 additions and 1818 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -182,9 +182,9 @@ cython_debug/
 .abstra/
 # Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
 #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
 #  you could uncomment the following to ignore the entire vscode folder
 # .vscode/
@@ -216,3 +216,14 @@ devenv.local.yaml
 # pre-commit
 .pre-commit-config.yaml
 # Devenv
 .devenv*
 devenv.local.nix
 devenv.local.yaml
 # direnv
 .direnv
 # pre-commit
 .pre-commit-config.yaml
--- a/devenv.lock
+++ b/devenv.lock
@@ -3,10 +3,10 @@
    "devenv": {
      "locked": {
        "dir": "src/modules",
-        "lastModified": 1764449550,
+        "lastModified": 1764927628,
        "owner": "cachix",
        "repo": "devenv",
-        "rev": "dfb58ac03bed07b93f629df55034bc50394d3971",
+        "rev": "247d7027f91368054fb0eefbd755a73d42b66fee",
        "type": "github"
      },
      "original": {
@@ -19,10 +19,10 @@
    "flake-compat": {
      "flake": false,
      "locked": {
-        "lastModified": 1761588595,
+        "lastModified": 1765121682,
        "owner": "edolstra",
        "repo": "flake-compat",
-        "rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5",
+        "rev": "65f23138d8d09a92e30f1e5c87611b23ef451bf3",
        "type": "github"
      },
      "original": {
@@ -34,10 +34,10 @@
    "flake-compat_2": {
      "flake": false,
      "locked": {
-        "lastModified": 1761588595,
+        "lastModified": 1765121682,
        "owner": "edolstra",
        "repo": "flake-compat",
-        "rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5",
+        "rev": "65f23138d8d09a92e30f1e5c87611b23ef451bf3",
        "type": "github"
      },
      "original": {
@@ -55,10 +55,10 @@
        ]
      },
      "locked": {
-        "lastModified": 1763988335,
+        "lastModified": 1765016596,
        "owner": "cachix",
        "repo": "git-hooks.nix",
-        "rev": "50b9238891e388c9fdc6a5c49e49c42533a1b5ce",
+        "rev": "548fc44fca28a5e81c5d6b846e555e6b9c2a5a3c",
        "type": "github"
      },
      "original": {
@@ -89,10 +89,10 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1761313199,
+        "lastModified": 1764580874,
        "owner": "cachix",
        "repo": "devenv-nixpkgs",
-        "rev": "d1c30452ebecfc55185ae6d1c983c09da0c274ff",
+        "rev": "dcf61356c3ab25f1362b4a4428a6d871e84f1d1d",
        "type": "github"
      },
      "original": {
@@ -108,10 +108,10 @@
        "nixpkgs": "nixpkgs_2"
      },
      "locked": {
-        "lastModified": 1763677049,
+        "lastModified": 1765052656,
        "owner": "cachix",
        "repo": "nixpkgs-python",
-        "rev": "159d63dc49a4b12bf85fe0e83011a8b69ba1bcb0",
+        "rev": "04b27dbad2e004cb237db202f21154eea3c4f89f",
        "type": "github"
      },
      "original": {
@@ -122,10 +122,10 @@
    },
    "nixpkgs_2": {
      "locked": {
-        "lastModified": 1764316264,
+        "lastModified": 1764939437,
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "9a7b80b6f82a71ea04270d7ba11b48855681c4b0",
+        "rev": "00d2457e2f608b4be6fe8b470b0a36816324b0ae",
        "type": "github"
      },
      "original": {
--- a/devenv.nix
+++ b/devenv.nix
@@ -6,8 +6,8 @@
  ...
 }: {
  packages = with pkgs; [
-    glxinfo
+    mujoco
-    vulkan-tools
+    linuxHeaders
  ];
  languages.c.enable = true;
@@ -55,30 +55,33 @@
      );
  };
-  env = rec {
+  env = {
-    NIX_LD_LIBRARY_PATH = "$NIX_LD_LIBRARY_PATH:/usr/lib/wsl/lib/";
+    CPATH = "${pkgs.linuxHeaders}/include";
-    LD_LIBRARY_PATH = NIX_LD_LIBRARY_PATH;
+    KERNEL_DIR = "${pkgs.linuxHeaders}/include";
    GLVND = "${pkgs.libglvnd}";
    MESA = "${pkgs.mesa}";
    MESAD = MESA;
    GALLIUM_DRIVER = "d3d12";
    MESA_D3D12_DEFAULT_ADAPTER_NAME = "NVIDIA";
    # LIBGL_ALWAYS_INDIRECT = 0;
    # __GLX_VENDOR_LIBRARY_NAME = "nvidia";
    # __NV_PRIME_RENDER_OFFLOAD = 1;
    # __VK_LAYER_NV_optimus = "NVIDIA_only";
    # PYOPENGL_PLATFORM = "egl";
    # EGL_PLATFORM = "wayland";
    # __EGL_VENDOR_LIBRARY_DIRS = "${MESA}/share/glvnd/egl_vendor.d:${GLVND}/share/glvnd/egl_vendor.d";
    # LIBGL_DRIVERS_PATH = "${MESA}/lib/dri";
  };
  # 创建 .venv 软链接到 .devenv/state/venv
  scripts.linkVenv.exec = ''
    if [ -L ".venv" ] || [ ! -e ".venv" ]; then
        rm -f ".venv" 2>/dev/null || true
        ln -s ".devenv/state/venv" ".venv"
        echo "Created symlink: .venv -> .devenv/state/venv"
    elif [ -e ".venv" ]; then
        echo "Warning: .venv exists and is not a symlink. Not replacing it."
        echo "If you want to replace it with a symlink, please remove it manually first."
    fi
  '';
  # https://devenv.sh/basics/
  enterShell = ''
-    git --version # Use packages
+    export NIX_LD_LIBRARY_PATH="$NIX_LD_LIBRARY_PATH:${lib.makeLibraryPath [ pkgs.linuxHeaders ]}";
    export LD_LIBRARY_PATH="$NIX_LD_LIBRARY_PATH";
    echo "NIX_LD_LIBRARY_PATH set to $NIX_LD_LIBRARY_PATH"
    echo "LD_LIBRARY_PATH set to $LD_LIBRARY_PATH"
    echo "CPATH set to $CPATH"
    echo "KERNEL_DIR set to $KERNEL_DIR"
    linkVenv
  '';
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,14 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
    "accelerate>=1.12.0",
    "mujoco>=3.4.0",
    "pillow>=12.0.0",
    "robosuite>=1.5.1",
    "robosuite-models>=1.0.0",
    "torch>=2.9.1",
    "torchvision>=0.24.1",
    "transformers>=4.57.3",
 ]
 [dependency-groups]
--- a/src/main.py
+++ b/src/main.py
@@ -1,126 +1,194 @@
-from typing import cast
+"""
 Simple OpenVLA Demo with Robosuite + MuJoCo
 Uses the official OpenVLA model from HuggingFace
 """
 import genesis as gs
 import numpy as np
-
+import robosuite as suite
-########################## init ##########################
+import torch
-gs.init(backend=gs.gs_backend.gpu)
+from PIL import Image
-
+from transformers import AutoModelForVision2Seq, AutoProcessor
 try:
    from genesis.engine.entities import RigidEntity
 except ImportError:
    raise ImportError("genesis.engine.entities.RigidEntity is not available")
 ########################## create a scene ##########################
 scene = gs.Scene(
    viewer_options=gs.options.ViewerOptions(
        camera_pos=(0, -3.5, 2.5),
        camera_lookat=(0.0, 0.0, 0.5),
        camera_fov=30,
        res=(960, 640),
        max_FPS=60,
    ),
    sim_options=gs.options.SimOptions(
        dt=0.01,
    ),
    show_viewer=True,
 )
 ########################## entities ##########################
 plane = scene.add_entity(
    gs.morphs.Plane(),
 )
 franka = scene.add_entity(
    gs.morphs.MJCF(
        file="xml/franka_emika_panda/panda.xml",
    ),
 )
 franka = cast(RigidEntity, franka)
-########################## build ##########################
+class OpenVLADemo:
-scene.build()
+    """OpenVLA Demo with Robosuite using HuggingFace model"""
-jnt_names = [
+    # OpenVLA action names (7-DoF)
-    "joint1",
+    ACTION_NAMES = [
-    "joint2",
+        "delta_x",
-    "joint3",
+        "delta_y",
-    "joint4",
+        "delta_z",
-    "joint5",
+        "delta_roll",
-    "joint6",
+        "delta_pitch",
-    "joint7",
+        "delta_yaw",
-    "finger_joint1",
+        "gripper",
-    "finger_joint2",
+    ]
 ]
 dofs_idx = [franka.get_joint(name).dof_idx_local for name in jnt_names]
-############ Optional: set control gains ############
+    def __init__(
-# set positional gains
+        self,
-franka.set_dofs_kp(
+        env_name: str = "Lift",
-    kp=np.array([4500, 4500, 3500, 3500, 2000, 2000, 2000, 100, 100]),
+        robot: str = "Panda",
-    dofs_idx_local=dofs_idx,
+        instruction: str = "pick up the red cube",
-)
+        model_name: str = "openvla/openvla-7b",
-# set velocity gains
+    ):
-franka.set_dofs_kv(
+        self.instruction = instruction
-    kv=np.array([450, 450, 350, 350, 200, 200, 200, 10, 10]),
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    dofs_idx_local=dofs_idx,
+        print(f"[INFO] Using device: {self.device}")
-)
+
-# set force range for safety
+        # Load OpenVLA model from HuggingFace
-franka.set_dofs_force_range(
+        print(f"[INFO] Loading OpenVLA model: {model_name}")
-    lower=np.array([-87, -87, -87, -87, -12, -12, -12, -100, -100]),
+        print("[INFO] This may take a while on first run...")
-    upper=np.array([87, 87, 87, 87, 12, 12, 12, 100, 100]),
+
-    dofs_idx_local=dofs_idx,
+        self.processor = AutoProcessor.from_pretrained(
-)
+            model_name, trust_remote_code=True
 # Hard reset
 for i in range(150):
    if i < 50:
        franka.set_dofs_position(np.array([1, 1, 0, 0, 0, 0, 0, 0.04, 0.04]), dofs_idx)
    elif i < 100:
        franka.set_dofs_position(
            np.array([-1, 0.8, 1, -2, 1, 0.5, -0.5, 0.04, 0.04]), dofs_idx
        )
    else:
        franka.set_dofs_position(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), dofs_idx)
-    scene.step()
+        # Load model with appropriate settings
        self.model = AutoModelForVision2Seq.from_pretrained(
            model_name,
            torch_dtype=torch.bfloat16,
            low_cpu_mem_usage=True,
            trust_remote_code=True,
        ).to(self.device)
-# PD control
+        self.model.eval()
-for i in range(1250):
+        print("[INFO] OpenVLA model loaded successfully!")
    if i == 0:
        franka.control_dofs_position(
            np.array([1, 1, 0, 0, 0, 0, 0, 0.04, 0.04]),
            dofs_idx,
        )
    elif i == 250:
        franka.control_dofs_position(
            np.array([-1, 0.8, 1, -2, 1, 0.5, -0.5, 0.04, 0.04]),
            dofs_idx,
        )
    elif i == 500:
        franka.control_dofs_position(
            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
            dofs_idx,
        )
    elif i == 750:
        # control first dof with velocity, and the rest with position
        franka.control_dofs_position(
            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])[1:],
            dofs_idx[1:],
        )
        franka.control_dofs_velocity(
            np.array([1.0, 0, 0, 0, 0, 0, 0, 0, 0])[:1],
            dofs_idx[:1],
        )
    elif i == 1000:
        franka.control_dofs_force(
            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
            dofs_idx,
        )
    # This is the control force computed based on the given control command
    # If using force control, it's the same as the given control command
    print("control force:", franka.get_dofs_control_force(dofs_idx))
-    # This is the actual force experienced by the dof
+        # Create robosuite environment
-    print("internal force:", franka.get_dofs_force(dofs_idx))
+        print(f"[INFO] Creating {env_name} environment with {robot} robot...")
        self.env = suite.make(
            env_name=env_name,
            robots=robot,
            has_renderer=True,
            has_offscreen_renderer=True,
            use_camera_obs=True,
            camera_names="agentview",
            camera_heights=224,
            camera_widths=224,
            control_freq=20,
            render_camera="frontview",
        )
-    scene.step()
+        print(f"[INFO] Environment created! Action dim: {self.env.action_dim}")
    def get_prompt(self) -> str:
        """Format the prompt for OpenVLA"""
        return f"In: What action should the robot take to {self.instruction}?\nOut:"
    @torch.no_grad()
    def get_action(self, obs: dict) -> np.ndarray:
        """Get action from OpenVLA model"""
        # Get image from observation and convert to PIL
        image_array = obs["agentview_image"]
        # Robosuite returns (H, W, C) uint8 image
        image = Image.fromarray(image_array)
        # Prepare prompt
        prompt = self.get_prompt()
        # Process inputs
        inputs = self.processor(prompt, image).to(self.device, dtype=torch.bfloat16)
        # Predict action using OpenVLA
        # Note: unnorm_key should match your robot setup
        # For simulation, we'll use raw normalized actions
        action = self.model.predict_action(**inputs, do_sample=False)
        # Action is numpy array of shape (7,)
        return action
    def print_vla_output(self, step: int, action: np.ndarray, reward: float):
        """Print detailed VLA output to terminal"""
        print(f"\n[Step {step:6d}] OpenVLA Output:")
        print(f'  Instruction: "{self.instruction}"')
        print("  Actions:")
        for name, value in zip(self.ACTION_NAMES, action):
            print(f"    {name:12s}: {value:+.4f}")
        print(f"  Step Reward: {reward:.4f}")
    def run(self):
        """Run the demo forever"""
        print("\n" + "=" * 60)
        print("OpenVLA Demo Started (Running Forever)")
        print(f'Instruction: "{self.instruction}"')
        print("Close the MuJoCo viewer window to quit")
        print("=" * 60 + "\n")
        episode = 0
        total_steps = 0
        # Run forever
        while True:
            episode += 1
            print(f"\n{'=' * 60}")
            print(f"Episode {episode} Started")
            print("=" * 60)
            obs = self.env.reset()
            episode_reward = 0.0
            episode_steps = 0
            while True:
                # Render environment with MuJoCo viewer
                self.env.render()
                # Get action from OpenVLA model
                action = self.get_action(obs)
                # Robosuite expects action of env.action_dim size
                # OpenVLA outputs 7-DoF, pad or truncate if needed
                if len(action) < self.env.action_dim:
                    action = np.concatenate(
                        [action, np.zeros(self.env.action_dim - len(action))]
                    )
                elif len(action) > self.env.action_dim:
                    action = action[: self.env.action_dim]
                # Step environment
                obs, reward, done, info = self.env.step(action)
                episode_reward += reward
                episode_steps += 1
                total_steps += 1
                # Print VLA output every 20 steps
                if episode_steps % 20 == 0:
                    self.print_vla_output(total_steps, action, reward)
                if done:
                    print(f"\n[INFO] Episode {episode} done at step {episode_steps}")
                    print(f"[INFO] Episode Reward: {episode_reward:.3f}")
                    break
            print(f"\nEpisode {episode} Summary:")
            print(f"  Steps: {episode_steps}")
            print(f"  Total Reward: {episode_reward:.3f}")
            print(f"  Total Steps (all episodes): {total_steps}")
    def close(self):
        """Clean up"""
        self.env.close()
 def main():
    print("=" * 60)
    print("OpenVLA Demo with Robosuite + MuJoCo")
    print("Using OpenVLA-7B from HuggingFace")
    print("=" * 60)
    demo = OpenVLADemo(
        env_name="Lift",
        robot="Panda",
        instruction="pick up the red cube",
    )
    try:
        demo.run()
    except KeyboardInterrupt:
        print("\n[INFO] Interrupted by user")
    finally:
        demo.close()
        print("[INFO] Demo closed")
 if __name__ == "__main__":
    main()
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
SikongJueluo	48bb2d3258	refector: use mujoco to replace genesis	2025-12-08 16:44:16 +08:00
SikongJueluo	7c09706628	build: make environment adapt to Nixos	2025-12-08 16:04:06 +08:00
SikongJueluo	3bc68ced4c	build: deps and environment change	2025-12-08 15:38:00 +08:00