From e86328b753e8501f320c17fe27ee71593a33ee45 Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Fri, 31 Oct 2025 19:19:38 +0800 Subject: [PATCH] =?UTF-8?q?fix(tokenizer):=20=E4=BF=AE=E5=A4=8Dstop=5Fids?= =?UTF-8?q?=E5=B1=9E=E6=80=A7=E8=BF=94=E5=9B=9E=E9=94=99=E8=AF=AF=E7=9A=84?= =?UTF-8?q?token=20ID=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- khaosz/data/tokenizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/khaosz/data/tokenizer.py b/khaosz/data/tokenizer.py index 935027d..214c293 100644 --- a/khaosz/data/tokenizer.py +++ b/khaosz/data/tokenizer.py @@ -93,7 +93,8 @@ class BpeTokenizer: @property def stop_ids(self) -> List[int]: - stop_ids = self._control_tokens + self._special_tokens + stop_token = self._control_tokens + self._special_tokens + stop_ids = [self._tokenizer.token_to_id(token) for token in stop_token] return stop_ids @property