feat(khaosz/core/tokenizer): 添加 user_id 和 system_id 属性

This commit is contained in:
ViperEkura 2025-09-29 13:47:37 +08:00
parent 89211c16f6
commit bdda1cc35a
2 changed files with 9 additions and 1 deletions

View File

@ -109,3 +109,11 @@ class BpeTokenizer:
@property
def pad_id(self) -> int:
return self._tokenizer.token_to_id("<pad>")
@property
def user_id(self) -> int:
return self._tokenizer.token_to_id("<|user|>")
@property
def system_id(self) -> int:
return self._tokenizer.token_to_id("<|system|>")

View File

@ -51,7 +51,7 @@ def train(
"multi_turn": multi_turn,
"bos_token_id": parameter.tokenizer.bos_id,
"eos_token_id": parameter.tokenizer.eos_id,
"user_token_id":parameter.tokenizer.encode("<|user|>")[0],
"user_token_id":parameter.tokenizer.user_id,
"dpo_beta": dpo_beta
}