AstrAI/khaosz/data/__init__.py

30 lines
587 B
Python

from khaosz.data.data_util import (
BaseDataset,
SeqDataset,
DpoDataset,
SftDataset,
PpoDataset,
MutiSegmentFetcher,
ResumeableRandomSampler,
DatasetLoader,
load_pkl_files,
build_attention_mask,
build_loss_mask
)
from khaosz.data.tokenizer import BpeTokenizer
__all__ = [
"BaseDataset",
"SeqDataset",
"DpoDataset",
"SftDataset",
"PpoDataset",
"MutiSegmentFetcher",
"ResumeableRandomSampler",
"DatasetLoader",
"load_pkl_files",
"build_attention_mask",
"build_loss_mask",
"BpeTokenizer"
]