refactor(compressors): Simplify module by removing SAM/DINO separation code

- Remove dino_compressor.py and segament_compressor.py - Rewrite pipeline.py to inline DINO into HashPipeline - Maintain backward compatibility: SAMHashPipeline alias - Update tests and benchmark.py
2026-07-12 20:15:31 +08:00 · 2026-03-07 21:33:42 +08:00
parent c8dc5f9301
commit 4da08dc3d3
8 changed files with 276 additions and 490 deletions
--- a/mini-nav/commands/benchmark.py
+++ b/mini-nav/commands/benchmark.py
@@ -1,4 +1,4 @@
-from typing import cast
+from typing import Any, Optional, cast

 import typer
 from commands import app
@@ -7,15 +7,15 @@ from commands import app
@app.command()
 def benchmark(
    ctx: typer.Context,
-    model_path: str = typer.Option(
+    model_path: Optional[str] = typer.Option(
        None, "--model", "-m", help="Path to compressor model weights"
    ),
 ):
    import torch
+    import torch.nn.functional as F
    from benchmarks import run_benchmark
-    from compressors import DinoCompressor
    from configs import cfg_manager
-    from transformers import AutoImageProcessor, BitImageProcessorFast
+    from transformers import AutoImageProcessor, AutoModel, BitImageProcessorFast
    from utils import get_device

    config = cfg_manager.get()
@@ -29,7 +29,12 @@ def benchmark(
        AutoImageProcessor.from_pretrained(model_cfg.dino_model, device_map=device),
    )

-    model = DinoCompressor().to(device)
+    # Load DINO model for feature extraction
+    dino = AutoModel.from_pretrained(model_cfg.dino_model, device_map=device)
+    dino.eval()
+
+    # Optional hash compressor
+    compressor = None
    if model_path:
        from compressors import HashCompressor

@@ -38,7 +43,31 @@ def benchmark(
            hash_bits=model_cfg.compression_dim,
        )
        compressor.load_state_dict(torch.load(model_path))
-        model.compressor = compressor
+        compressor.to(device)
+        compressor.eval()
+
+    # Create wrapper with extract_features method
+    class DinoFeatureExtractor:
+        def __init__(self, dino, compressor=None):
+            self.dino = dino
+            self.compressor = compressor
+
+        def extract_features(self, images: list) -> torch.Tensor:
+            inputs = processor(images, return_tensors="pt").to(device)
+            with torch.no_grad():
+                outputs = self.dino(**inputs)
+                features = outputs.last_hidden_state.mean(dim=1)
+                features = F.normalize(features, dim=-1)
+            return features
+
+        def encode(self, images: list) -> torch.Tensor:
+            if self.compressor is None:
+                return self.extract_features(images)
+            tokens = self.dino(**processor(images, return_tensors="pt").to(device)).last_hidden_state
+            _, _, bits = self.compressor(tokens)
+            return bits
+
+    model = DinoFeatureExtractor(dino, compressor)

    run_benchmark(
        model=model,
--- a/mini-nav/compressors/init.py
+++ b/mini-nav/compressors/init.py
@@ -1,18 +1,15 @@
 from .common import BinarySign, bits_to_hash, hamming_distance, hamming_similarity, hash_to_bits
-from .dino_compressor import DinoCompressor
 from .hash_compressor import HashCompressor, HashLoss, VideoPositiveMask
-from .pipeline import SAMHashPipeline, create_pipeline_from_config
-from .segament_compressor import SegmentCompressor
+from .pipeline import HashPipeline, SAMHashPipeline, create_pipeline_from_config
 from .train import train

 __all__ = [
    "train",
-    "DinoCompressor",
    "HashCompressor",
    "HashLoss",
    "VideoPositiveMask",
-    "SegmentCompressor",
-    "SAMHashPipeline",
+    "HashPipeline",
+    "SAMHashPipeline",  # Backward compatibility alias
    "create_pipeline_from_config",
    "BinarySign",
    "hamming_distance",
--- a/mini-nav/compressors/dino_compressor.py
+++ b/mini-nav/compressors/dino_compressor.py
@@ -1,105 +0,0 @@
-from typing import Optional
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from PIL import Image
-from transformers import AutoImageProcessor, AutoModel
-
-
-class DinoCompressor(nn.Module):
-    """DINOv2 feature extractor with optional hash compression.
-
-    When compressor is None: returns normalized DINO embeddings.
-    When compressor is provided: returns binary hash bits for CAM storage.
-
-    Supports both PIL Image input and pre-extracted tokens.
-    """
-
-    def __init__(
-        self,
-        model_name: str = "facebook/dinov2-large",
-        compressor: Optional[nn.Module] = None,
-        device: Optional[str] = None,
-    ):
-        """Initialize DINOv2 extractor.
-
-        Args:
-            model_name: HuggingFace model name
-            compressor: Optional hash compressor for producing binary codes
-            device: Device to load model on
-        """
-        super().__init__()
-
-        # Auto detect device
-        if device is None:
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.device = torch.device(device)
-
-        self.model_name = model_name
-        self.processor = AutoImageProcessor.from_pretrained(model_name)
-        self.dino = AutoModel.from_pretrained(model_name).to(self.device)
-        self.dino.eval()
-
-        self.compressor = compressor
-
-    def forward(self, inputs):
-        teacher_tokens = self.dino(**inputs).last_hidden_state  # [B,N,1024]
-
-        teacher_embed = teacher_tokens.mean(dim=1)
-        teacher_embed = F.normalize(teacher_embed, dim=-1)  # [B,1024]
-
-        if self.compressor is None:
-            return teacher_embed
-
-        # HashCompressor returns (logits, hash_codes, bits)
-        _, _, bits = self.compressor(teacher_tokens)
-        return bits  # [B, 512] binary bits for CAM
-
-    def extract_features(self, images: list[Image.Image]) -> torch.Tensor:
-        """Extract DINO features from a list of cropped object images.
-
-        Args:
-            images: List of PIL Images (cropped objects)
-
-        Returns:
-            DINO features [N, feature_dim], normalized
-        """
-        if len(images) == 0:
-            return torch.empty(0, self.dino.config.hidden_size, device=self.device)
-
-        # Process batch of images
-        inputs = self.processor(images, return_tensors="pt").to(self.device)
-
-        with torch.no_grad():
-            outputs = self.dino(**inputs)
-
-        # Pool tokens to get global representation
-        features = outputs.last_hidden_state.mean(dim=1)  # [N, 1024]
-        features = F.normalize(features, dim=-1)
-
-        return features
-
-    def encode(self, images: list[Image.Image]) -> torch.Tensor:
-        """Extract features from images and optionally compress to hash codes.
-
-        Args:
-            images: List of PIL Images
-
-        Returns:
-            If compressor is None: DINO features [N, 1024]
-            If compressor is set: Binary hash bits [N, 512]
-        """
-        if self.compressor is None:
-            return self.extract_features(images)
-
-        # Extract features first
-        features = self.extract_features(images)  # [N, 1024]
-
-        # Add sequence dimension for compressor (expects [B, N, dim])
-        features = features.unsqueeze(1)  # [N, 1, 1024]
-
-        # Compress to hash codes
-        _, _, bits = self.compressor(features)
-
-        return bits
--- a/mini-nav/compressors/pipeline.py
+++ b/mini-nav/compressors/pipeline.py
@@ -1,78 +1,65 @@
-"""Complete pipeline for SAM + DINO + HashCompressor.
+"""Hash compression pipeline with DINO feature extraction.

-This pipeline extracts object masks from images using SAM2.1,
-crops the objects, extracts features using DINOv2,
-and compresses them to binary hash codes using HashCompressor.
+This pipeline extracts features using DINOv2 and compresses them
+to binary hash codes using HashCompressor.
 """

 from typing import Optional

 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from PIL import Image
-
-from .dino_compressor import DinoCompressor
-from .hash_compressor import HashCompressor
-from .segament_compressor import SegmentCompressor
+from transformers import AutoImageProcessor, AutoModel


-def create_pipeline_from_config(config) -> "SAMHashPipeline":
-    """Create SAMHashPipeline from a config object.
+def create_pipeline_from_config(config) -> "HashPipeline":
+    """Create HashPipeline from a config object.

    Args:
        config: Configuration object with model settings

    Returns:
-        Initialized SAMHashPipeline
+        Initialized HashPipeline
    """
-    return SAMHashPipeline(
-        sam_model=config.model.sam_model,
-        dino_model=config.model.name,
+    return HashPipeline(
+        dino_model=config.model.dino_model,
        hash_bits=config.model.compression_dim,
-        sam_min_mask_area=config.model.sam_min_mask_area,
-        sam_max_masks=config.model.sam_max_masks,
        compressor_path=config.model.compressor_path,
        device=config.model.device if config.model.device != "auto" else None,
    )


-class SAMHashPipeline(nn.Module):
-    """Complete pipeline: SAM segmentation + DINO features + Hash compression.
+class HashPipeline(nn.Module):
+    """Pipeline: DINO features + Hash compression.

    Pipeline flow:
-        Image -> SAM (extract masks) -> Crop objects -> DINO (features) -> Hash (binary codes)
+        PIL Image -> DINO (features) -> Hash (binary codes)

    Usage:
        # Initialize with config
-        pipeline = SAMHashPipeline(
-            sam_model="facebook/sam2.1-hiera-large",
+        pipeline = HashPipeline(
            dino_model="facebook/dinov2-large",
            hash_bits=512,
        )

        # Process image
        image = Image.open("path/to/image.jpg")
-        hash_codes = pipeline(image)  # [N, 512] binary bits
+        hash_bits = pipeline(image)  # [1, 512] binary bits
    """

    def __init__(
        self,
-        sam_model: str = "facebook/sam2.1-hiera-large",
        dino_model: str = "facebook/dinov2-large",
        hash_bits: int = 512,
-        sam_min_mask_area: int = 100,
-        sam_max_masks: int = 10,
        compressor_path: Optional[str] = None,
        device: Optional[str] = None,
    ):
-        """Initialize the complete pipeline.
+        """Initialize the pipeline.

        Args:
-            sam_model: SAM model name from HuggingFace
            dino_model: DINOv2 model name from HuggingFace
            hash_bits: Number of bits in hash code
-            sam_min_mask_area: Minimum mask area threshold
-            sam_max_masks: Maximum number of masks to keep
            compressor_path: Optional path to trained HashCompressor weights
            device: Device to run models on
        """
@@ -83,87 +70,101 @@ class SAMHashPipeline(nn.Module):
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(device)

-        # Initialize components
-        self.segmentor = SegmentCompressor(
-            model_name=sam_model,
-            min_mask_area=sam_min_mask_area,
-            max_masks=sam_max_masks,
-            device=device,
-        )
+        self.dino_model = dino_model

-        # HashCompressor expects DINO features (1024 dim for dinov2-large)
-        dino_dim = 1024 if "large" in dino_model else 768
-        self.hash_compressor = HashCompressor(
-            input_dim=dino_dim, hash_bits=hash_bits
-        ).to(device)
+        # Initialize DINO processor and model
+        self.processor = AutoImageProcessor.from_pretrained(dino_model)
+        self.dino = AutoModel.from_pretrained(dino_model).to(self.device)
+        self.dino.eval()
+
+        # Determine DINO feature dimension
+        self.dino_dim = 1024 if "large" in dino_model else 768
+
+        # Initialize HashCompressor
+        self.hash_compressor = nn.Module()  # Placeholder, will be replaced
+        self._init_hash_compressor(hash_bits, compressor_path)
+
+    def _init_hash_compressor(
+        self, hash_bits: int, compressor_path: Optional[str] = None
+    ):
+        """Initialize the hash compressor module.
+
+        This is called during __init__ but we need to replace it properly.
+        """
+        # Import here to avoid circular imports
+        from .hash_compressor import HashCompressor
+
+        compressor = HashCompressor(input_dim=self.dino_dim, hash_bits=hash_bits).to(
+            self.device
+        )

        # Load pretrained compressor if provided
        if compressor_path is not None:
-            self.hash_compressor.load_state_dict(
-                torch.load(compressor_path, map_location=device)
+            compressor.load_state_dict(
+                torch.load(compressor_path, map_location=self.device)
            )
            print(f"[OK] Loaded HashCompressor from {compressor_path}")

-        self.dino = DinoCompressor(
-            model_name=dino_model,
-            compressor=self.hash_compressor,
-            device=device,
-        )
+        # Replace the placeholder
+        self.hash_compressor = compressor
+
+    @property
+    def hash_bits(self):
+        """Return the number of hash bits."""
+        return self.hash_compressor.hash_bits

    def forward(self, image: Image.Image) -> torch.Tensor:
-        """Process a single image through the complete pipeline.
+        """Process a single image through the pipeline.

        Args:
            image: Input PIL Image

        Returns:
-            Binary hash codes [N, hash_bits] where N is number of detected objects
+            Binary hash codes [1, hash_bits] as int32
        """
-        # Step 1: SAM - extract and crop objects
-        cropped_objects = self.segmentor(image)
+        # Extract DINO features
+        inputs = self.processor(image, return_tensors="pt").to(self.device)

-        if len(cropped_objects) == 0:
-            # No objects detected, return empty tensor
-            return torch.empty(
-                0, self.hash_compressor.hash_bits, dtype=torch.int32, device=self.device
-            )
+        with torch.no_grad():
+            outputs = self.dino(**inputs)
+            tokens = outputs.last_hidden_state  # [1, N, dim]

-        # Step 2: DINO - extract features from cropped objects
-        # Step 3: HashCompressor - compress features to binary codes
-        hash_codes = self.dino.encode(cropped_objects)
+        # Compress to hash codes
+        _, _, bits = self.hash_compressor(tokens)

-        return hash_codes
+        return bits

-    def extract_features(
-        self, image: Image.Image, use_hash: bool = False
-    ) -> torch.Tensor:
-        """Extract features from image with optional hash compression.
+    def encode(self, image: Image.Image) -> torch.Tensor:
+        """Encode an image to binary hash bits.

-        Args:
-            image: Input PIL Image
-            use_hash: If True, return binary hash codes; else return DINO features
-
-        Returns:
-            Features [N, dim] where dim is 1024 (DINO) or 512 (hash)
-        """
-        cropped_objects = self.segmentor(image)
-
-        if len(cropped_objects) == 0:
-            dim = self.hash_compressor.hash_bits if use_hash else 1024
-            return torch.empty(0, dim, device=self.device)
-
-        if use_hash:
-            return self.dino.encode(cropped_objects)
-        else:
-            return self.dino.extract_features(cropped_objects)
-
-    def extract_masks(self, image: Image.Image) -> list[torch.Tensor]:
-        """Extract only masks without full processing (for debugging).
+        Alias for forward().

        Args:
            image: Input PIL Image

        Returns:
-            List of binary masks [H, W]
+            Binary hash codes [1, hash_bits] as int32
        """
-        return self.segmentor.extract_masks(image)
+        return self.forward(image)
+
+    def extract_features(self, image: Image.Image) -> torch.Tensor:
+        """Extract DINO features from an image.
+
+        Args:
+            image: Input PIL Image
+
+        Returns:
+            DINO features [1, dino_dim], normalized
+        """
+        inputs = self.processor(image, return_tensors="pt").to(self.device)
+
+        with torch.no_grad():
+            outputs = self.dino(**inputs)
+            features = outputs.last_hidden_state.mean(dim=1)  # [1, dim]
+            features = F.normalize(features, dim=-1)
+
+        return features
+
+
+# Backward compatibility alias
+SAMHashPipeline = HashPipeline
--- a/mini-nav/compressors/segament_compressor.py
+++ b/mini-nav/compressors/segament_compressor.py
@@ -1,180 +0,0 @@
-"""Segment Anything 2 feature extractor with mask filtering and image cropping.
-
-Extracts object masks from images using SAM2.1, filters by area and confidence,
-then crops the original image to obtain individual object regions.
-"""
-
-from typing import Optional
-
-import numpy as np
-import torch
-import torch.nn as nn
-from PIL import Image
-from transformers import AutoModelForMaskGeneration, AutoProcessor
-
-
-class SegmentCompressor(nn.Module):
-    """SAM2.1 based segmenter with mask filtering.
-
-    Extracts object masks from images, filters by area and confidence,
-    and crops the original image to produce individual object patches.
-    """
-
-    def __init__(
-        self,
-        model_name: str = "facebook/sam2.1-hiera-large",
-        min_mask_area: int = 100,
-        max_masks: int = 10,
-        device: Optional[str] = None,
-    ):
-        """Initialize SAM2.1 segmenter.
-
-        Args:
-            model_name: HuggingFace model name for SAM2.1
-            min_mask_area: Minimum mask pixel area threshold
-            max_masks: Maximum number of masks to keep
-            device: Device to load model on (auto-detect if None)
-        """
-        super().__init__()
-
-        self.model_name = model_name
-        self.min_mask_area = min_mask_area
-        self.max_masks = max_masks
-
-        # Auto detect device
-        if device is None:
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.device = torch.device(device)
-
-        # Load SAM model and processor
-        self.processor = AutoProcessor.from_pretrained(model_name)
-        self.model = AutoModelForMaskGeneration.from_pretrained(model_name).to(
-            self.device
-        )
-        self.model.eval()
-
-    def forward(self, image: Image.Image) -> list[Image.Image]:
-        """Extract object masks and crop object regions.
-
-        Args:
-            image: Input PIL Image
-
-        Returns:
-            List of cropped object images (one per valid mask)
-        """
-        # Run SAM inference
-        inputs = self.processor(image, return_tensors="pt").to(self.device)
-
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-
-        # Post-process masks
-        masks = self.processor.post_process_masks(
-            outputs.pred_masks,
-            inputs["original_sizes"],
-            inputs["reshaped_input_sizes"],
-        )[0]
-
-        # Filter masks by area and confidence
-        valid_masks = self._filter_masks(masks)
-
-        if len(valid_masks) == 0:
-            return []
-
-        # Crop object regions from original image
-        cropped_objects = self._crop_objects(image, valid_masks)
-
-        return cropped_objects
-
-    def _filter_masks(self, masks: torch.Tensor) -> list[dict]:
-        """Filter masks by area and keep top-N.
-
-        Args:
-            masks: Predicted masks [N, H, W]
-
-        Returns:
-            List of mask dictionaries with 'mask' and 'area'
-        """
-        valid_masks = []
-
-        for mask in masks:
-            # Calculate mask area
-            area = mask.sum().item()
-
-            # Filter by minimum area
-            if area < self.min_mask_area:
-                continue
-
-            valid_masks.append({"mask": mask, "area": area})
-
-        # Sort by area (descending) and keep top-N
-        valid_masks = sorted(valid_masks, key=lambda x: x["area"], reverse=True)
-        valid_masks = valid_masks[: self.max_masks]
-
-        return valid_masks
-
-    def _crop_objects(
-        self, image: Image.Image, masks: list[dict]
-    ) -> list[Image.Image]:
-        """Crop object regions from image using masks.
-
-        Args:
-            image: Original PIL Image
-            masks: List of mask dictionaries
-
-        Returns:
-            List of cropped object images
-        """
-        # Convert PIL to numpy for processing
-        image_np = np.array(image)
-        h, w = image_np.shape[:2]
-
-        cropped_objects = []
-
-        for mask_info in masks:
-            mask = mask_info["mask"].cpu().numpy()
-
-            # Find bounding box from mask
-            rows = mask.any(axis=1)
-            cols = mask.any(axis=0)
-
-            if not rows.any() or not cols.any():
-                continue
-
-            y_min, y_max = rows.argmax(), h - rows[::-1].argmax() - 1
-            x_min, x_max = cols.argmax(), w - cols[::-1].argmax() - 1
-
-            # Add small padding
-            pad = 5
-            x_min = max(0, x_min - pad)
-            y_min = max(0, y_min - pad)
-            x_max = min(w, x_max + pad)
-            y_max = min(h, y_max + pad)
-
-            # Crop
-            cropped = image.crop((x_min, y_min, x_max, y_max))
-            cropped_objects.append(cropped)
-
-        return cropped_objects
-
-    @torch.no_grad()
-    def extract_masks(self, image: Image.Image) -> list[torch.Tensor]:
-        """Extract only masks without cropping (for debugging).
-
-        Args:
-            image: Input PIL Image
-
-        Returns:
-            List of binary masks [H, W]
-        """
-        inputs = self.processor(image, return_tensors="pt").to(self.device)
-        outputs = self.model(**inputs)
-
-        masks = self.processor.post_process_masks(
-            outputs.pred_masks,
-            inputs["original_sizes"],
-            inputs["reshaped_input_sizes"],
-        )[0]
-
-        valid_masks = self._filter_masks(masks)
-        return [m["mask"] for m in valid_masks]
--- a/mini-nav/tests/test_compressors.py
+++ b/mini-nav/tests/test_compressors.py
@@ -1,13 +1,13 @@
-"""Tests for compressor modules (SAM, DINO, HashCompressor, Pipeline)."""
+"""Tests for compressor modules (HashCompressor, Pipeline)."""

 import pytest
 import torch
 from compressors import (
    BinarySign,
-    DinoCompressor,
    HashCompressor,
+    HashPipeline,
    SAMHashPipeline,
-    SegmentCompressor,
+    VideoPositiveMask,
    bits_to_hash,
    create_pipeline_from_config,
    hamming_distance,
@@ -124,87 +124,105 @@ class TestHammingMetrics:
        assert sim.item() == 512  # Max similarity


-class TestSegmentCompressor:
-    """Test suite for SegmentCompressor."""
+class TestHashLoss:
+    """Test suite for HashLoss."""

-    @pytest.fixture
-    def mock_image(self):
-        """Create a mock PIL image."""
-        img = Image.new("RGB", (224, 224), color="red")
-        return img
+    def test_hash_loss_init(self):
+        """Verify HashLoss initializes with correct parameters."""
+        from compressors import HashLoss

-    def test_segment_compressor_init(self):
-        """Verify SegmentCompressor initializes with correct parameters."""
-        segmentor = SegmentCompressor(
-            model_name="facebook/sam2.1-hiera-large",
-            min_mask_area=100,
-            max_masks=10,
+        loss_fn = HashLoss(
+            contrastive_weight=1.0,
+            distill_weight=0.5,
+            quant_weight=0.01,
+            temperature=0.2,
        )

-        assert segmentor.model_name == "facebook/sam2.1-hiera-large"
-        assert segmentor.min_mask_area == 100
-        assert segmentor.max_masks == 10
+        assert loss_fn.contrastive_weight == 1.0
+        assert loss_fn.distill_weight == 0.5
+        assert loss_fn.quant_weight == 0.01
+        assert loss_fn.temperature == 0.2

-    def test_filter_masks(self):
-        """Verify mask filtering logic."""
-        # Create segmentor to get default filter params
-        segmentor = SegmentCompressor()
+    def test_hash_loss_forward(self):
+        """Verify HashLoss computes loss correctly."""
+        from compressors import HashLoss

-        # Create mock masks tensor with different areas
-        # Masks shape: [N, H, W]
-        masks = []
-        for area in [50, 200, 150, 300, 10]:
-            mask = torch.zeros(100, 100)
-            mask[:1, :area] = 1  # Create mask with specific area
-            masks.append(mask)
+        loss_fn = HashLoss()

-        masks_tensor = torch.stack(masks)  # [5, 100, 100]
-        valid = segmentor._filter_masks(masks_tensor)
+        batch_size = 4
+        hash_bits = 512
+        logits = torch.randn(batch_size, hash_bits)
+        hash_codes = torch.sign(logits)
+        teacher_embed = torch.randn(batch_size, 1024)
+        positive_mask = torch.eye(batch_size, dtype=torch.bool)

-        # Should filter out 50 and 10 (below min_mask_area=100)
-        # Then keep top 3 (max_masks=10)
-        assert len(valid) == 3
-        # Verify sorted by area (descending)
-        areas = [v["area"] for v in valid]
-        assert areas == sorted(areas, reverse=True)
+        total_loss, components = loss_fn(
+            logits=logits,
+            hash_codes=hash_codes,
+            teacher_embed=teacher_embed,
+            positive_mask=positive_mask,
+        )
+
+        assert "contrastive" in components
+        assert "distill" in components
+        assert "quantization" in components
+        assert "total" in components


-class TestDinoCompressor:
-    """Test suite for DinoCompressor."""
+class TestVideoPositiveMask:
+    """Test suite for VideoPositiveMask."""

-    def test_dino_compressor_init(self):
-        """Verify DinoCompressor initializes correctly."""
-        dino = DinoCompressor()
+    def test_from_frame_indices(self):
+        """Verify positive mask generation from frame indices."""
+        mask_gen = VideoPositiveMask(temporal_window=2)

-        assert dino.model_name == "facebook/dinov2-large"
+        frame_indices = torch.tensor([0, 1, 3, 5])

-    def test_dino_compressor_with_compressor(self):
-        """Verify DinoCompressor with HashCompressor."""
-        hash_compressor = HashCompressor(input_dim=1024, hash_bits=512)
-        dino = DinoCompressor(compressor=hash_compressor)
+        mask = mask_gen.from_frame_indices(frame_indices)

-        assert dino.compressor is hash_compressor
+        assert mask.shape == (4, 4)
+        # Frame 0 and 1 should be positive (distance 1 <= 2)
+        assert mask[0, 1] == True
+        # Frame 0 and 3 should be negative (distance 3 > 2)
+        assert mask[0, 3] == False
+
+    def test_from_video_ids(self):
+        """Verify positive mask generation from video IDs and frame indices."""
+        mask_gen = VideoPositiveMask(temporal_window=2)
+
+        video_ids = torch.tensor([0, 0, 1, 1])
+        frame_indices = torch.tensor([0, 1, 0, 1])
+
+        mask = mask_gen.from_video_ids(video_ids, frame_indices)
+
+        assert mask.shape == (4, 4)
+        # Same video and temporally close
+        assert mask[0, 1] == True  # video 0, frames 0,1
+        # Different video
+        assert mask[0, 2] == False  # video 0 vs 1


-class TestSAMHashPipeline:
-    """Test suite for SAMHashPipeline."""
+class TestHashPipeline:
+    """Test suite for HashPipeline."""

    def test_pipeline_init(self):
        """Verify pipeline initializes all components."""
-        pipeline = SAMHashPipeline(
-            sam_model="facebook/sam2.1-hiera-large",
+        pipeline = HashPipeline(
            dino_model="facebook/dinov2-large",
            hash_bits=512,
        )

-        assert isinstance(pipeline.segmentor, SegmentCompressor)
-        assert isinstance(pipeline.dino, DinoCompressor)
-        assert isinstance(pipeline.hash_compressor, HashCompressor)
+        assert pipeline.dino_model == "facebook/dinov2-large"
+        assert pipeline.dino_dim == 1024

    def test_pipeline_hash_bits(self):
        """Verify pipeline uses correct hash bits."""
-        pipeline = SAMHashPipeline(hash_bits=256)
-        assert pipeline.hash_compressor.hash_bits == 256
+        pipeline = HashPipeline(hash_bits=256)
+        assert pipeline.hash_bits == 256
+
+    def test_pipeline_alias(self):
+        """Verify SAMHashPipeline is alias for HashPipeline."""
+        assert SAMHashPipeline is HashPipeline


 class TestConfigIntegration:
@@ -216,25 +234,21 @@ class TestConfigIntegration:

        pipeline = create_pipeline_from_config(config)

-        assert isinstance(pipeline, SAMHashPipeline)
-        assert pipeline.hash_compressor.hash_bits == config.model.compression_dim
+        assert isinstance(pipeline, HashPipeline)
+        assert pipeline.hash_bits == config.model.compression_dim

-    def test_config_sam_settings(self):
-        """Verify config contains SAM settings."""
+    def test_config_settings(self):
+        """Verify config contains required settings."""
        config = cfg_manager.load()

-        assert hasattr(config.model, "sam_model")
-        assert hasattr(config.model, "sam_min_mask_area")
-        assert hasattr(config.model, "sam_max_masks")
-        assert config.model.sam_model == "facebook/sam2.1-hiera-large"
-        assert config.model.sam_min_mask_area == 100
-        assert config.model.sam_max_masks == 10
+        assert hasattr(config.model, "dino_model")
+        assert hasattr(config.model, "compression_dim")


+@pytest.mark.slow
 class TestPipelineIntegration:
    """Integration tests for full pipeline (slow, requires model downloads)."""

-    @pytest.mark.slow
    def test_pipeline_end_to_end(self):
        """Test full pipeline with actual models (slow test)."""
        # Skip if no GPU
@@ -245,54 +259,32 @@ class TestPipelineIntegration:
        image = Image.new("RGB", (640, 480), color=(128, 128, 128))

        # Initialize pipeline (will download models on first run)
-        pipeline = SAMHashPipeline(
-            sam_model="facebook/sam2.1-hiera-large",
+        pipeline = HashPipeline(
            dino_model="facebook/dinov2-large",
            hash_bits=512,
-            sam_min_mask_area=100,
-            sam_max_masks=5,
        )

        # Run pipeline
-        hash_codes = pipeline(image)
+        hash_bits = pipeline(image)

        # Verify output shape
-        assert hash_codes.dim() == 2
-        assert hash_codes.shape[1] == 512
-        assert torch.all((hash_codes == 0) | (hash_codes == 1))
+        assert hash_bits.dim() == 2
+        assert hash_bits.shape[1] == 512
+        assert torch.all((hash_bits == 0) | (hash_bits == 1))

-    @pytest.mark.slow
-    def test_extract_features_without_hash(self):
-        """Test feature extraction without hash compression."""
+    def test_extract_features(self):
+        """Test feature extraction."""
        if not torch.cuda.is_available():
            pytest.skip("Requires CUDA")

        image = Image.new("RGB", (640, 480), color=(128, 128, 128))

-        pipeline = SAMHashPipeline(
-            sam_model="facebook/sam2.1-hiera-large",
+        pipeline = HashPipeline(
            dino_model="facebook/dinov2-large",
        )

-        features = pipeline.extract_features(image, use_hash=False)
+        features = pipeline.extract_features(image)

        # Should return DINO features (1024 for large)
        assert features.dim() == 2
        assert features.shape[1] == 1024
-
-    @pytest.mark.slow
-    def test_extract_masks_only(self):
-        """Test mask extraction only."""
-        if not torch.cuda.is_available():
-            pytest.skip("Requires CUDA")
-
-        image = Image.new("RGB", (640, 480), color=(128, 128, 128))
-
-        pipeline = SAMHashPipeline(
-            sam_model="facebook/sam2.1-hiera-large",
-        )
-
-        masks = pipeline.extract_masks(image)
-
-        # Should return a list of masks
-        assert isinstance(masks, list)