From 0cf80a5dd2085e3d38d4857754b816ae1e812a5c Mon Sep 17 00:00:00 2001 From: leejet Date: Thu, 19 Feb 2026 20:48:23 +0800 Subject: [PATCH] fix: safely handle whitespace and consecutive newlines --- src/tokenize_util.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/tokenize_util.cpp b/src/tokenize_util.cpp index bc0ff1d5a..22cf8ae2e 100644 --- a/src/tokenize_util.cpp +++ b/src/tokenize_util.cpp @@ -919,15 +919,21 @@ std::vector token_split(const std::string& text) { // `\s*[\r\n]+|\s+(?!\S)|\s+` if (is_space(cp)) { - std::string token = codepoint_to_utf8(cp); - ++i; + std::string token; + bool saw_new_line = false; while (i < cps.size() && is_space(cps[i])) { token += codepoint_to_utf8(cps[i]); - ++i; + if (cps[i] == U'\r' || cps[i] == U'\n') { - break; + saw_new_line = true; + } else { + if (saw_new_line) { + break; + } } + + ++i; } tokens.push_back(token);