feat(feature-compressor): add DINOv2 feature extraction and compression pipeline

This commit is contained in:
2026-01-31 10:33:37 +08:00
parent f9a359fc28
commit 1454647aa6
22 changed files with 1486 additions and 16 deletions

View File

@@ -0,0 +1,63 @@
"""Basic usage example for DINOv2 Feature Compressor."""
import sys
from pathlib import Path
# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import requests
from PIL import Image
import io
from dino_feature_compressor import DINOv2FeatureExtractor, FeatureVisualizer
def main():
# Initialize extractor
print("Initializing DINOv2FeatureExtractor...")
extractor = DINOv2FeatureExtractor()
# Download and save test image
print("Downloading test image...")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
response = requests.get(url)
img = Image.open(io.BytesIO(response.content))
test_image_path = "/tmp/test_image.jpg"
img.save(test_image_path)
print(f"Image saved to {test_image_path}")
# Extract features
print("Extracting features...")
result = extractor.process_image(test_image_path)
print(f"\n=== Feature Extraction Results ===")
print(f"Original features shape: {result['original_features'].shape}")
print(f"Compressed features shape: {result['compressed_features'].shape}")
print(f"Processing time: {result['metadata']['processing_time']:.3f}s")
print(f"Compression ratio: {result['metadata']['compression_ratio']:.2f}x")
print(f"Feature norm: {result['metadata']['feature_norm']:.4f}")
print(f"Device: {result['metadata']['device']}")
# Visualize
print("\nGenerating visualization...")
viz = FeatureVisualizer()
fig = viz.plot_histogram(
result["compressed_features"], title="Compressed Features Distribution"
)
output_path = (
Path(__file__).parent.parent.parent / "outputs" / "basic_usage_histogram"
)
output_path.parent.mkdir(parents=True, exist_ok=True)
viz.save(fig, str(output_path), formats=["html"])
print(f"Visualization saved to {output_path}.html")
print("\nDone!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,49 @@
"""Batch processing example for DINOv2 Feature Compressor."""
import sys
from pathlib import Path
# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from dino_feature_compressor import DINOv2FeatureExtractor
def main():
# Initialize extractor
print("Initializing DINOv2FeatureExtractor...")
extractor = DINOv2FeatureExtractor()
# Create a test directory with sample images
# In practice, use your own directory
image_dir = "/tmp/test_images"
Path(image_dir).mkdir(parents=True, exist_ok=True)
# Create 3 test images
print("Creating test images...")
import numpy as np
from PIL import Image
for i in range(3):
img_array = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
img = Image.fromarray(img_array)
img.save(f"{image_dir}/test_{i}.jpg")
print(f"Created 3 test images in {image_dir}")
# Process batch
print("\nProcessing images in batch...")
results = extractor.process_batch(image_dir, batch_size=2, save_features=True)
print(f"\n=== Batch Processing Results ===")
print(f"Processed {len(results)} images")
for i, result in enumerate(results):
print(f"\nImage {i + 1}: {result['metadata']['image_path']}")
print(f" Compressed shape: {result['compressed_features'].shape}")
print(f" Feature norm: {result['metadata']['feature_norm']:.4f}")
print("\nDone! Features saved to outputs/ directory.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,61 @@
"""Visualization example for DINOv2 Feature Compressor."""
import sys
from pathlib import Path
# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import numpy as np
import torch
from dino_feature_compressor import FeatureVisualizer
def main():
# Generate synthetic features for demonstration
print("Generating synthetic features...")
n_samples = 100
n_features = 256
# Create two clusters
cluster1 = np.random.randn(50, n_features) + 2
cluster2 = np.random.randn(50, n_features) - 2
features = np.vstack([cluster1, cluster2])
labels = ["Cluster A"] * 50 + ["Cluster B"] * 50
features_tensor = torch.tensor(features, dtype=torch.float32)
# Initialize visualizer
print("Initializing FeatureVisualizer...")
viz = FeatureVisualizer()
output_dir = Path(__file__).parent.parent.parent / "outputs"
output_dir.mkdir(parents=True, exist_ok=True)
# Create histogram
print("Creating histogram...")
fig_hist = viz.plot_histogram(features_tensor, title="Feature Distribution")
viz.save(fig_hist, str(output_dir / "feature_histogram"), formats=["html", "json"])
print(f"Saved histogram to {output_dir / 'feature_histogram.html'}")
# Create PCA 2D projection
print("Creating PCA 2D projection...")
fig_pca = viz.plot_pca_2d(features_tensor, labels=labels)
viz.save(fig_pca, str(output_dir / "feature_pca_2d"), formats=["html", "json"])
print(f"Saved PCA to {output_dir / 'feature_pca_2d.html'}")
# Create comparison plot
print("Creating comparison plot...")
features_list = [torch.tensor(cluster1), torch.tensor(cluster2)]
names = ["Cluster A", "Cluster B"]
fig_comp = viz.plot_comparison(features_list, names)
viz.save(fig_comp, str(output_dir / "feature_comparison"), formats=["html", "json"])
print(f"Saved comparison to {output_dir / 'feature_comparison.html'}")
print("\nDone! All visualizations saved to outputs/ directory.")
if __name__ == "__main__":
main()