-
Notifications
You must be signed in to change notification settings - Fork 64
Open
Description
Looking at https://github.com/zurawiki/tiktoken-rs/blob/main/tiktoken-rs/src/patched_tiktoken.rs#L64C1-L85C6
Why would the API require passing tokens by value and immediately taking reference?
/// Decode a vector of tokens into a valid UTF-8 String
///
/// If unicode validation is not wanted, see _decode_native.
pub fn decode(&self, tokens: Vec<Rank>) -> Result<String> {
match String::from_utf8(self.decode_bytes(&tokens)?) {
Ok(text) => Ok(text),
Err(e) => Err(anyhow!("Unable to decode into a valid UTF-8 string: {}", e)),
}
}
pub fn _decode_native_and_split(
&self,
tokens: Vec<Rank>,
) -> impl Iterator<Item = Vec<u8>> + '_ {
tokens.into_iter().map(|token| {
let token_bytes = self
.decoder
.get(&token)
.unwrap_or_else(|| &self.special_tokens_decoder[&token]);
token_bytes.clone()
})
}Metadata
Metadata
Assignees
Labels
No labels