Skip to content

Commit

Permalink
Merge pull request #181 from bespokelabsai/ryanm/special-token-error
Browse files Browse the repository at this point in the history
  • Loading branch information
vutrung96 authored Nov 30, 2024
2 parents 6252238 + 3a49331 commit 87cfc3d
Showing 1 changed file with 7 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def num_tokens_consumed_from_request(
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
try:
num_tokens += len(encoding.encode(str(value)))
num_tokens += len(encoding.encode(str(value), disallowed_special=()))
except TypeError:
logger.warning(
f"Failed to encode value {value} with tiktoken to count tokens. Instead assuming a token for every 4 characters."
Expand All @@ -688,11 +688,13 @@ def num_tokens_consumed_from_request(
else:
prompt = api_specific_request_json["prompt"]
if isinstance(prompt, str): # single prompt
prompt_tokens = len(encoding.encode(prompt))
prompt_tokens = len(encoding.encode(prompt, disallowed_special=()))
num_tokens = prompt_tokens + completion_tokens
return num_tokens
elif isinstance(prompt, list): # multiple prompts
prompt_tokens = sum([len(encoding.encode(p)) for p in prompt])
prompt_tokens = sum(
[len(encoding.encode(p, disallowed_special=())) for p in prompt]
)
num_tokens = prompt_tokens + completion_tokens * len(prompt)
return num_tokens
else:
Expand All @@ -703,10 +705,10 @@ def num_tokens_consumed_from_request(
elif api_endpoint == "embeddings":
input = api_specific_request_json["input"]
if isinstance(input, str): # single input
num_tokens = len(encoding.encode(input))
num_tokens = len(encoding.encode(input, disallowed_special=()))
return num_tokens
elif isinstance(input, list): # multiple inputs
num_tokens = sum([len(encoding.encode(i)) for i in input])
num_tokens = sum([len(encoding.encode(i, disallowed_special=())) for i in input])
return num_tokens
else:
raise TypeError(
Expand Down

0 comments on commit 87cfc3d

Please sign in to comment.