Skip to content

Commit

Permalink
Merge pull request #346 from parea-ai/refactor-update-default-tokens
Browse files Browse the repository at this point in the history
fix: provide default for non openai models
  • Loading branch information
joschkabraun committed Jan 26, 2024
2 parents 4e48c36 + d1f0163 commit 3e7ed17
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 8 deletions.
9 changes: 2 additions & 7 deletions parea/evals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,7 @@ def get_tokens(model: str, text: str) -> Union[str, list[int]]:
return []
try:
encoding = tiktoken.encoding_for_model(model)
tokens = encoding.encode(text)
except KeyError:
regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
text = text.lower()
text = "".join(char for char in text if char not in set(string.punctuation))
text = re.sub(regex, " ", text)
text = " ".join(text.split())
tokens = text.split()
encoding = tiktoken.get_encoding("cl100k_base")
tokens = encoding.encode(text)
return tokens
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "parea-ai"
packages = [{ include = "parea" }]
version = "0.2.36"
version = "0.2.37"
description = "Parea python sdk"
readme = "README.md"
authors = ["joel-parea-ai <[email protected]>"]
Expand Down

0 comments on commit 3e7ed17

Please sign in to comment.