mirror of
https://github.com/SikongJueluo/Mini-Nav.git
synced 2026-03-12 12:25:32 +08:00
158 lines
5.1 KiB
Python
158 lines
5.1 KiB
Python
"""Local dataset loader for benchmark evaluation."""
|
|
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
from ..base import BaseDataset
|
|
|
|
|
|
class LocalDataset(BaseDataset):
|
|
"""Dataset loader for local datasets."""
|
|
|
|
def __init__(
|
|
self,
|
|
local_path: str,
|
|
img_column: str = "image_path",
|
|
label_column: str = "label",
|
|
):
|
|
"""Initialize local dataset loader.
|
|
|
|
Args:
|
|
local_path: Path to local dataset directory or CSV file.
|
|
img_column: Name of the image path column.
|
|
label_column: Name of the label column.
|
|
"""
|
|
self.local_path = Path(local_path)
|
|
self.img_column = img_column
|
|
self.label_column = label_column
|
|
self._train_dataset: Optional[Any] = None
|
|
self._test_dataset: Optional[Any] = None
|
|
|
|
def _load_csv_dataset(self) -> tuple[Any, Any]:
|
|
"""Load dataset from CSV file.
|
|
|
|
Expected CSV format:
|
|
label,image_path,x1,y1,x2,y2
|
|
"class_name","path/to/image.jpg",100,200,300,400
|
|
|
|
Returns:
|
|
Tuple of (train_dataset, test_dataset).
|
|
"""
|
|
import pandas as pd
|
|
|
|
from torch.utils.data import Dataset as TorchDataset
|
|
|
|
# Load CSV file
|
|
df = pd.read_csv(self.local_path)
|
|
|
|
# Create a simple dataset class
|
|
class CSVDataset(TorchDataset):
|
|
def __init__(self, dataframe: pd.DataFrame, img_col: str, label_col: str):
|
|
self.df = dataframe.reset_index(drop=True)
|
|
self.img_col = img_col
|
|
self.label_col = label_col
|
|
|
|
def __len__(self) -> int:
|
|
return len(self.df)
|
|
|
|
def __getitem__(self, idx: int) -> dict[str, Any]:
|
|
row = self.df.iloc[idx]
|
|
return {
|
|
"img": row[self.img_col],
|
|
"label": row[self.label_col],
|
|
}
|
|
|
|
# Split into train/test (80/20)
|
|
split_idx = int(len(df) * 0.8)
|
|
train_df = df.iloc[:split_idx]
|
|
test_df = df.iloc[split_idx:]
|
|
|
|
self._train_dataset = CSVDataset(train_df, self.img_column, self.label_column)
|
|
self._test_dataset = CSVDataset(test_df, self.img_column, self.label_column)
|
|
|
|
return self._train_dataset, self._test_dataset
|
|
|
|
def _load_directory_dataset(self) -> tuple[Any, Any]:
|
|
"""Load dataset from directory structure.
|
|
|
|
Expected structure:
|
|
local_path/
|
|
train/
|
|
class_name_1/
|
|
image1.jpg
|
|
image2.jpg
|
|
class_name_2/
|
|
image1.jpg
|
|
test/
|
|
class_name_1/
|
|
image1.jpg
|
|
|
|
Returns:
|
|
Tuple of (train_dataset, test_dataset).
|
|
"""
|
|
from torch.utils.data import Dataset as TorchDataset
|
|
from PIL import Image
|
|
|
|
class DirectoryDataset(TorchDataset):
|
|
def __init__(self, root_dir: Path, transform=None):
|
|
self.root_dir = root_dir
|
|
self.transform = transform
|
|
self.samples = []
|
|
self.label_map = {}
|
|
|
|
# Build label map
|
|
classes = sorted([d.name for d in root_dir.iterdir() if d.is_dir()])
|
|
self.label_map = {cls: idx for idx, cls in enumerate(classes)}
|
|
|
|
# Build sample list
|
|
for cls_dir in root_dir.iterdir():
|
|
if cls_dir.is_dir():
|
|
label = self.label_map[cls_dir.name]
|
|
for img_path in cls_dir.iterdir():
|
|
if img_path.suffix.lower() in [".jpg", ".jpeg", ".png", ".bmp"]:
|
|
self.samples.append((img_path, label))
|
|
|
|
def __len__(self) -> int:
|
|
return len(self.samples)
|
|
|
|
def __getitem__(self, idx: int) -> dict[str, Any]:
|
|
img_path, label = self.samples[idx]
|
|
image = Image.open(img_path).convert("RGB")
|
|
return {"img": image, "label": label}
|
|
|
|
train_dir = self.local_path / "train"
|
|
test_dir = self.local_path / "test"
|
|
|
|
if train_dir.exists():
|
|
self._train_dataset = DirectoryDataset(train_dir)
|
|
if test_dir.exists():
|
|
self._test_dataset = DirectoryDataset(test_dir)
|
|
|
|
return self._train_dataset, self._test_dataset
|
|
|
|
def get_train_split(self) -> Any:
|
|
"""Get training split of the dataset.
|
|
|
|
Returns:
|
|
Training dataset.
|
|
"""
|
|
if self._train_dataset is None:
|
|
if self.local_path.suffix.lower() == ".csv":
|
|
self._load_csv_dataset()
|
|
else:
|
|
self._load_directory_dataset()
|
|
return self._train_dataset
|
|
|
|
def get_test_split(self) -> Any:
|
|
"""Get test/evaluation split of the dataset.
|
|
|
|
Returns:
|
|
Test dataset.
|
|
"""
|
|
if self._test_dataset is None:
|
|
if self.local_path.suffix.lower() == ".csv":
|
|
self._load_csv_dataset()
|
|
else:
|
|
self._load_directory_dataset()
|
|
return self._test_dataset
|