diff --git a/khaosz/core/tokenizer.py b/khaosz/core/tokenizer.py index 1f0f1ed..65e06b4 100644 --- a/khaosz/core/tokenizer.py +++ b/khaosz/core/tokenizer.py @@ -109,3 +109,11 @@ class BpeTokenizer: @property def pad_id(self) -> int: return self._tokenizer.token_to_id("") + + @property + def user_id(self) -> int: + return self._tokenizer.token_to_id("<|user|>") + + @property + def system_id(self) -> int: + return self._tokenizer.token_to_id("<|system|>") \ No newline at end of file diff --git a/train.py b/train.py index 64805d5..bf3dc16 100644 --- a/train.py +++ b/train.py @@ -51,7 +51,7 @@ def train( "multi_turn": multi_turn, "bos_token_id": parameter.tokenizer.bos_id, "eos_token_id": parameter.tokenizer.eos_id, - "user_token_id":parameter.tokenizer.encode("<|user|>")[0], + "user_token_id":parameter.tokenizer.user_id, "dpo_beta": dpo_beta }