Compare commits

...

4 Commits

6 changed files with 766 additions and 1277 deletions

11
.gitignore vendored
View File

@@ -216,3 +216,14 @@ devenv.local.yaml
# pre-commit # pre-commit
.pre-commit-config.yaml .pre-commit-config.yaml
# Devenv
.devenv*
devenv.local.nix
devenv.local.yaml
# direnv
.direnv
# pre-commit
.pre-commit-config.yaml

View File

@@ -3,10 +3,10 @@
"devenv": { "devenv": {
"locked": { "locked": {
"dir": "src/modules", "dir": "src/modules",
"lastModified": 1764449550, "lastModified": 1764927628,
"owner": "cachix", "owner": "cachix",
"repo": "devenv", "repo": "devenv",
"rev": "dfb58ac03bed07b93f629df55034bc50394d3971", "rev": "247d7027f91368054fb0eefbd755a73d42b66fee",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -19,10 +19,10 @@
"flake-compat": { "flake-compat": {
"flake": false, "flake": false,
"locked": { "locked": {
"lastModified": 1761588595, "lastModified": 1765121682,
"owner": "edolstra", "owner": "edolstra",
"repo": "flake-compat", "repo": "flake-compat",
"rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5", "rev": "65f23138d8d09a92e30f1e5c87611b23ef451bf3",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -34,10 +34,10 @@
"flake-compat_2": { "flake-compat_2": {
"flake": false, "flake": false,
"locked": { "locked": {
"lastModified": 1761588595, "lastModified": 1765121682,
"owner": "edolstra", "owner": "edolstra",
"repo": "flake-compat", "repo": "flake-compat",
"rev": "f387cd2afec9419c8ee37694406ca490c3f34ee5", "rev": "65f23138d8d09a92e30f1e5c87611b23ef451bf3",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -55,10 +55,10 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1763988335, "lastModified": 1765016596,
"owner": "cachix", "owner": "cachix",
"repo": "git-hooks.nix", "repo": "git-hooks.nix",
"rev": "50b9238891e388c9fdc6a5c49e49c42533a1b5ce", "rev": "548fc44fca28a5e81c5d6b846e555e6b9c2a5a3c",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -89,10 +89,10 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1761313199, "lastModified": 1764580874,
"owner": "cachix", "owner": "cachix",
"repo": "devenv-nixpkgs", "repo": "devenv-nixpkgs",
"rev": "d1c30452ebecfc55185ae6d1c983c09da0c274ff", "rev": "dcf61356c3ab25f1362b4a4428a6d871e84f1d1d",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -108,10 +108,10 @@
"nixpkgs": "nixpkgs_2" "nixpkgs": "nixpkgs_2"
}, },
"locked": { "locked": {
"lastModified": 1763677049, "lastModified": 1765052656,
"owner": "cachix", "owner": "cachix",
"repo": "nixpkgs-python", "repo": "nixpkgs-python",
"rev": "159d63dc49a4b12bf85fe0e83011a8b69ba1bcb0", "rev": "04b27dbad2e004cb237db202f21154eea3c4f89f",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -122,10 +122,10 @@
}, },
"nixpkgs_2": { "nixpkgs_2": {
"locked": { "locked": {
"lastModified": 1764316264, "lastModified": 1764939437,
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "9a7b80b6f82a71ea04270d7ba11b48855681c4b0", "rev": "00d2457e2f608b4be6fe8b470b0a36816324b0ae",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@@ -6,8 +6,8 @@
... ...
}: { }: {
packages = with pkgs; [ packages = with pkgs; [
glxinfo mujoco
vulkan-tools linuxHeaders
]; ];
languages.c.enable = true; languages.c.enable = true;
@@ -55,30 +55,33 @@
); );
}; };
env = rec { env = {
NIX_LD_LIBRARY_PATH = "$NIX_LD_LIBRARY_PATH:/usr/lib/wsl/lib/"; CPATH = "${pkgs.linuxHeaders}/include";
LD_LIBRARY_PATH = NIX_LD_LIBRARY_PATH; KERNEL_DIR = "${pkgs.linuxHeaders}/include";
GLVND = "${pkgs.libglvnd}";
MESA = "${pkgs.mesa}";
MESAD = MESA;
GALLIUM_DRIVER = "d3d12";
MESA_D3D12_DEFAULT_ADAPTER_NAME = "NVIDIA";
# LIBGL_ALWAYS_INDIRECT = 0;
# __GLX_VENDOR_LIBRARY_NAME = "nvidia";
# __NV_PRIME_RENDER_OFFLOAD = 1;
# __VK_LAYER_NV_optimus = "NVIDIA_only";
# PYOPENGL_PLATFORM = "egl";
# EGL_PLATFORM = "wayland";
# __EGL_VENDOR_LIBRARY_DIRS = "${MESA}/share/glvnd/egl_vendor.d:${GLVND}/share/glvnd/egl_vendor.d";
# LIBGL_DRIVERS_PATH = "${MESA}/lib/dri";
}; };
# 创建 .venv 软链接到 .devenv/state/venv
scripts.linkVenv.exec = ''
if [ -L ".venv" ] || [ ! -e ".venv" ]; then
rm -f ".venv" 2>/dev/null || true
ln -s ".devenv/state/venv" ".venv"
echo "Created symlink: .venv -> .devenv/state/venv"
elif [ -e ".venv" ]; then
echo "Warning: .venv exists and is not a symlink. Not replacing it."
echo "If you want to replace it with a symlink, please remove it manually first."
fi
'';
# https://devenv.sh/basics/ # https://devenv.sh/basics/
enterShell = '' enterShell = ''
git --version # Use packages export NIX_LD_LIBRARY_PATH="$NIX_LD_LIBRARY_PATH:${lib.makeLibraryPath [ pkgs.linuxHeaders ]}";
export LD_LIBRARY_PATH="$NIX_LD_LIBRARY_PATH";
echo "NIX_LD_LIBRARY_PATH set to $NIX_LD_LIBRARY_PATH"
echo "LD_LIBRARY_PATH set to $LD_LIBRARY_PATH"
echo "CPATH set to $CPATH"
echo "KERNEL_DIR set to $KERNEL_DIR"
linkVenv
''; '';
} }

View File

@@ -5,8 +5,15 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
"accelerate>=1.12.0",
"mujoco>=3.4.0",
"pillow>=12.0.0",
"robosuite>=1.5.1",
"robosuite-models>=1.0.0",
"timm>=0.9.10,<1.0.0",
"torch>=2.9.1", "torch>=2.9.1",
"torchvision>=0.24.1", "torchvision>=0.24.1",
"transformers==4.40.1",
] ]
[dependency-groups] [dependency-groups]

View File

@@ -1,126 +1,196 @@
from typing import cast """
Simple OpenVLA Demo with Robosuite + MuJoCo
Uses the official OpenVLA model from HuggingFace
"""
import genesis as gs
import numpy as np import numpy as np
import robosuite as suite
import torch
from PIL import Image
from transformers import AutoModelForVision2Seq, AutoProcessor
########################## init ##########################
gs.init(backend=gs.gs_backend.gpu) class OpenVLADemo:
"""OpenVLA Demo with Robosuite using HuggingFace model"""
# OpenVLA action names (7-DoF)
ACTION_NAMES = [
"delta_x",
"delta_y",
"delta_z",
"delta_roll",
"delta_pitch",
"delta_yaw",
"gripper",
]
def __init__(
self,
env_name: str = "Lift",
robot: str = "Panda",
instruction: str = "pick up the red cube",
model_name: str = "openvla/openvla-7b",
):
self.instruction = instruction
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] Using device: {self.device}")
# Load OpenVLA model from HuggingFace
print(f"[INFO] Loading OpenVLA model: {model_name}")
print("[INFO] This may take a while on first run...")
self.processor = AutoProcessor.from_pretrained(
model_name, trust_remote_code=True
)
# Load model with appropriate settings
self.model = AutoModelForVision2Seq.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
).to(self.device)
self.model.eval()
print("[INFO] OpenVLA model loaded successfully!")
# Create robosuite environment
print(f"[INFO] Creating {env_name} environment with {robot} robot...")
self.env = suite.make(
env_name=env_name,
robots=robot,
has_renderer=True,
has_offscreen_renderer=True,
use_camera_obs=True,
camera_names="agentview",
camera_heights=224,
camera_widths=224,
control_freq=20,
render_camera="frontview",
)
print(f"[INFO] Environment created! Action dim: {self.env.action_dim}")
def get_prompt(self) -> str:
"""Format the prompt for OpenVLA"""
return f"In: What action should the robot take to {self.instruction}?\nOut:"
@torch.no_grad()
def get_action(self, obs: dict) -> np.ndarray:
"""Get action from OpenVLA model"""
# Get image from observation and convert to PIL
image_array = obs["agentview_image"]
# Robosuite returns (H, W, C) uint8 image
image = Image.fromarray(image_array)
# Prepare prompt
prompt = self.get_prompt()
# Process inputs
inputs = self.processor(prompt, image).to(self.device, dtype=torch.bfloat16)
# Predict action using OpenVLA
# Note: unnorm_key should match your robot setup
# For simulation, we'll use raw normalized actions
action = self.model.predict_action(
**inputs, do_sample=False, unnorm_key="roboturk"
)
# Action is numpy array of shape (7,)
return action
def print_vla_output(self, step: int, action: np.ndarray, reward: float):
"""Print detailed VLA output to terminal"""
print(f"\n[Step {step:6d}] OpenVLA Output:")
print(f' Instruction: "{self.instruction}"')
print(" Actions:")
for name, value in zip(self.ACTION_NAMES, action):
print(f" {name:12s}: {value:+.4f}")
print(f" Step Reward: {reward:.4f}")
def run(self):
"""Run the demo forever"""
print("\n" + "=" * 60)
print("OpenVLA Demo Started (Running Forever)")
print(f'Instruction: "{self.instruction}"')
print("Close the MuJoCo viewer window to quit")
print("=" * 60 + "\n")
episode = 0
total_steps = 0
# Run forever
while True:
episode += 1
print(f"\n{'=' * 60}")
print(f"Episode {episode} Started")
print("=" * 60)
obs = self.env.reset()
episode_reward = 0.0
episode_steps = 0
while True:
# Render environment with MuJoCo viewer
self.env.render()
# Get action from OpenVLA model
action = self.get_action(obs)
# Robosuite expects action of env.action_dim size
# OpenVLA outputs 7-DoF, pad or truncate if needed
if len(action) < self.env.action_dim:
action = np.concatenate(
[action, np.zeros(self.env.action_dim - len(action))]
)
elif len(action) > self.env.action_dim:
action = action[: self.env.action_dim]
# Step environment
obs, reward, done, info = self.env.step(action)
episode_reward += reward
episode_steps += 1
total_steps += 1
# Print VLA output every 20 steps
if episode_steps % 20 == 0:
self.print_vla_output(total_steps, action, reward)
if done:
print(f"\n[INFO] Episode {episode} done at step {episode_steps}")
print(f"[INFO] Episode Reward: {episode_reward:.3f}")
break
print(f"\nEpisode {episode} Summary:")
print(f" Steps: {episode_steps}")
print(f" Total Reward: {episode_reward:.3f}")
print(f" Total Steps (all episodes): {total_steps}")
def close(self):
"""Clean up"""
self.env.close()
def main():
print("=" * 60)
print("OpenVLA Demo with Robosuite + MuJoCo")
print("Using OpenVLA-7B from HuggingFace")
print("=" * 60)
demo = OpenVLADemo(
env_name="Lift",
robot="Panda",
instruction="pick up the red cube",
)
try: try:
from genesis.engine.entities import RigidEntity demo.run()
except ImportError: except KeyboardInterrupt:
raise ImportError("genesis.engine.entities.RigidEntity is not available") print("\n[INFO] Interrupted by user")
finally:
########################## create a scene ########################## demo.close()
scene = gs.Scene( print("[INFO] Demo closed")
viewer_options=gs.options.ViewerOptions(
camera_pos=(0, -3.5, 2.5),
camera_lookat=(0.0, 0.0, 0.5),
camera_fov=30,
res=(960, 640),
max_FPS=60,
),
sim_options=gs.options.SimOptions(
dt=0.01,
),
show_viewer=True,
)
########################## entities ##########################
plane = scene.add_entity(
gs.morphs.Plane(),
)
franka = scene.add_entity(
gs.morphs.MJCF(
file="xml/franka_emika_panda/panda.xml",
),
)
franka = cast(RigidEntity, franka)
########################## build ########################## if __name__ == "__main__":
scene.build() main()
jnt_names = [
"joint1",
"joint2",
"joint3",
"joint4",
"joint5",
"joint6",
"joint7",
"finger_joint1",
"finger_joint2",
]
dofs_idx = [franka.get_joint(name).dof_idx_local for name in jnt_names]
############ Optional: set control gains ############
# set positional gains
franka.set_dofs_kp(
kp=np.array([4500, 4500, 3500, 3500, 2000, 2000, 2000, 100, 100]),
dofs_idx_local=dofs_idx,
)
# set velocity gains
franka.set_dofs_kv(
kv=np.array([450, 450, 350, 350, 200, 200, 200, 10, 10]),
dofs_idx_local=dofs_idx,
)
# set force range for safety
franka.set_dofs_force_range(
lower=np.array([-87, -87, -87, -87, -12, -12, -12, -100, -100]),
upper=np.array([87, 87, 87, 87, 12, 12, 12, 100, 100]),
dofs_idx_local=dofs_idx,
)
# Hard reset
for i in range(150):
if i < 50:
franka.set_dofs_position(np.array([1, 1, 0, 0, 0, 0, 0, 0.04, 0.04]), dofs_idx)
elif i < 100:
franka.set_dofs_position(
np.array([-1, 0.8, 1, -2, 1, 0.5, -0.5, 0.04, 0.04]), dofs_idx
)
else:
franka.set_dofs_position(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), dofs_idx)
scene.step()
# PD control
for i in range(1250):
if i == 0:
franka.control_dofs_position(
np.array([1, 1, 0, 0, 0, 0, 0, 0.04, 0.04]),
dofs_idx,
)
elif i == 250:
franka.control_dofs_position(
np.array([-1, 0.8, 1, -2, 1, 0.5, -0.5, 0.04, 0.04]),
dofs_idx,
)
elif i == 500:
franka.control_dofs_position(
np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
dofs_idx,
)
elif i == 750:
# control first dof with velocity, and the rest with position
franka.control_dofs_position(
np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])[1:],
dofs_idx[1:],
)
franka.control_dofs_velocity(
np.array([1.0, 0, 0, 0, 0, 0, 0, 0, 0])[:1],
dofs_idx[:1],
)
elif i == 1000:
franka.control_dofs_force(
np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
dofs_idx,
)
# This is the control force computed based on the given control command
# If using force control, it's the same as the given control command
print("control force:", franka.get_dofs_control_force(dofs_idx))
# This is the actual force experienced by the dof
print("internal force:", franka.get_dofs_force(dofs_idx))
scene.step()

1644
uv.lock generated

File diff suppressed because it is too large Load Diff