AstrAI/astrai/data/__init__.py

31 lines
679 B
Python

from astrai.data.dataset import (
BaseDataset,
DatasetFactory,
DatasetLoader,
DPODataset,
GRPODataset,
MultiSegmentFetcher,
SEQDataset,
SFTDataset,
)
from astrai.data.sampler import ResumableDistributedSampler
from astrai.data.tokenizer import BpeTokenizer
__all__ = [
# Base classes
"BaseDataset",
# Dataset implementations
"SEQDataset",
"SFTDataset",
"DPODataset",
"GRPODataset",
# Fetchers
"MultiSegmentFetcher",
# Factory (DatasetLoader is alias for backward compatibility)
"DatasetLoader",
"DatasetFactory",
# Tokenizer and sampler
"BpeTokenizer",
"ResumableDistributedSampler",
]