feat(feature-compressor): add DINOv2 feature extraction and compression pipeline

2026-05-10 17:45:31 +08:00 · 2026-01-31 10:33:37 +08:00
parent f9a359fc28
commit 1454647aa6
22 changed files with 1486 additions and 16 deletions
--- a/mini-nav/feature_compressor/examples/basic_usage.py
+++ b/mini-nav/feature_compressor/examples/basic_usage.py
@@ -0,0 +1,63 @@
+"""Basic usage example for DINOv2 Feature Compressor."""
+
+import sys
+from pathlib import Path
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import requests
+from PIL import Image
+import io
+
+from dino_feature_compressor import DINOv2FeatureExtractor, FeatureVisualizer
+
+
+def main():
+    # Initialize extractor
+    print("Initializing DINOv2FeatureExtractor...")
+    extractor = DINOv2FeatureExtractor()
+
+    # Download and save test image
+    print("Downloading test image...")
+    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+    response = requests.get(url)
+    img = Image.open(io.BytesIO(response.content))
+
+    test_image_path = "/tmp/test_image.jpg"
+    img.save(test_image_path)
+    print(f"Image saved to {test_image_path}")
+
+    # Extract features
+    print("Extracting features...")
+    result = extractor.process_image(test_image_path)
+
+    print(f"\n=== Feature Extraction Results ===")
+    print(f"Original features shape: {result['original_features'].shape}")
+    print(f"Compressed features shape: {result['compressed_features'].shape}")
+    print(f"Processing time: {result['metadata']['processing_time']:.3f}s")
+    print(f"Compression ratio: {result['metadata']['compression_ratio']:.2f}x")
+    print(f"Feature norm: {result['metadata']['feature_norm']:.4f}")
+    print(f"Device: {result['metadata']['device']}")
+
+    # Visualize
+    print("\nGenerating visualization...")
+    viz = FeatureVisualizer()
+
+    fig = viz.plot_histogram(
+        result["compressed_features"], title="Compressed Features Distribution"
+    )
+
+    output_path = (
+        Path(__file__).parent.parent.parent / "outputs" / "basic_usage_histogram"
+    )
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    viz.save(fig, str(output_path), formats=["html"])
+    print(f"Visualization saved to {output_path}.html")
+
+    print("\nDone!")
+
+
+if __name__ == "__main__":
+    main()
--- a/mini-nav/feature_compressor/examples/batch_processing.py
+++ b/mini-nav/feature_compressor/examples/batch_processing.py
@@ -0,0 +1,49 @@
+"""Batch processing example for DINOv2 Feature Compressor."""
+
+import sys
+from pathlib import Path
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dino_feature_compressor import DINOv2FeatureExtractor
+
+
+def main():
+    # Initialize extractor
+    print("Initializing DINOv2FeatureExtractor...")
+    extractor = DINOv2FeatureExtractor()
+
+    # Create a test directory with sample images
+    # In practice, use your own directory
+    image_dir = "/tmp/test_images"
+    Path(image_dir).mkdir(parents=True, exist_ok=True)
+
+    # Create 3 test images
+    print("Creating test images...")
+    import numpy as np
+    from PIL import Image
+
+    for i in range(3):
+        img_array = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
+        img = Image.fromarray(img_array)
+        img.save(f"{image_dir}/test_{i}.jpg")
+    print(f"Created 3 test images in {image_dir}")
+
+    # Process batch
+    print("\nProcessing images in batch...")
+    results = extractor.process_batch(image_dir, batch_size=2, save_features=True)
+
+    print(f"\n=== Batch Processing Results ===")
+    print(f"Processed {len(results)} images")
+
+    for i, result in enumerate(results):
+        print(f"\nImage {i + 1}: {result['metadata']['image_path']}")
+        print(f"  Compressed shape: {result['compressed_features'].shape}")
+        print(f"  Feature norm: {result['metadata']['feature_norm']:.4f}")
+
+    print("\nDone! Features saved to outputs/ directory.")
+
+
+if __name__ == "__main__":
+    main()
--- a/mini-nav/feature_compressor/examples/visualization.py
+++ b/mini-nav/feature_compressor/examples/visualization.py
@@ -0,0 +1,61 @@
+"""Visualization example for DINOv2 Feature Compressor."""
+
+import sys
+from pathlib import Path
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+import numpy as np
+import torch
+
+from dino_feature_compressor import FeatureVisualizer
+
+
+def main():
+    # Generate synthetic features for demonstration
+    print("Generating synthetic features...")
+    n_samples = 100
+    n_features = 256
+
+    # Create two clusters
+    cluster1 = np.random.randn(50, n_features) + 2
+    cluster2 = np.random.randn(50, n_features) - 2
+    features = np.vstack([cluster1, cluster2])
+
+    labels = ["Cluster A"] * 50 + ["Cluster B"] * 50
+
+    features_tensor = torch.tensor(features, dtype=torch.float32)
+
+    # Initialize visualizer
+    print("Initializing FeatureVisualizer...")
+    viz = FeatureVisualizer()
+
+    output_dir = Path(__file__).parent.parent.parent / "outputs"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Create histogram
+    print("Creating histogram...")
+    fig_hist = viz.plot_histogram(features_tensor, title="Feature Distribution")
+    viz.save(fig_hist, str(output_dir / "feature_histogram"), formats=["html", "json"])
+    print(f"Saved histogram to {output_dir / 'feature_histogram.html'}")
+
+    # Create PCA 2D projection
+    print("Creating PCA 2D projection...")
+    fig_pca = viz.plot_pca_2d(features_tensor, labels=labels)
+    viz.save(fig_pca, str(output_dir / "feature_pca_2d"), formats=["html", "json"])
+    print(f"Saved PCA to {output_dir / 'feature_pca_2d.html'}")
+
+    # Create comparison plot
+    print("Creating comparison plot...")
+    features_list = [torch.tensor(cluster1), torch.tensor(cluster2)]
+    names = ["Cluster A", "Cluster B"]
+    fig_comp = viz.plot_comparison(features_list, names)
+    viz.save(fig_comp, str(output_dir / "feature_comparison"), formats=["html", "json"])
+    print(f"Saved comparison to {output_dir / 'feature_comparison.html'}")
+
+    print("\nDone! All visualizations saved to outputs/ directory.")
+
+
+if __name__ == "__main__":
+    main()