Skip to content

Commit

Permalink
* fix custom model config
Browse files Browse the repository at this point in the history
* upgrade dependencies
* remove announcement
  • Loading branch information
asofter committed Aug 20, 2024
1 parent 6633da1 commit 0e995d4
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
types: [ python ]

- repo: https://github.com/zricethezav/gitleaks
rev: v8.18.3
rev: v8.18.4
hooks:
- id: gitleaks

Expand Down
1 change: 0 additions & 1 deletion .ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ exclude = ["examples/google_gemini.py"]
ignore = [
'RUF001' # String contains ambiguous `,`
]
ignore-init-module-imports = true
select = [
'A', # flake8-builtins
'ASYNC', # flake8-async
Expand Down
2 changes: 1 addition & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-

### Changed
-
- Upgrade dependencies to the latest versions.

### Removed
-
Expand Down
4 changes: 0 additions & 4 deletions docs/overrides/main.html
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
{% extends "base.html" %}

{% block announce %}
<strong>Laiyer AI is now part of Protect AI!</strong> We're excited to join forces to continue our mission to build a safer AI powered world. <a href="https://protectai.com/press/protect-ai-acquires-laiyer-ai">Learn More</a>
{% endblock %}
6 changes: 5 additions & 1 deletion llm_guard/input_scanners/prompt_injection.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@
"max_length": 512,
"truncation": True,
},
tokenizer_kwargs={
"use_fast": False,
"token": True,
},
kwargs={"token": True}, # You can also configure with your token.
)

Expand Down Expand Up @@ -121,7 +125,7 @@ def __init__(
self,
*,
model: Model | None = None,
threshold: float = 0.9,
threshold: float = 0.92,
match_type: MatchType | str = MatchType.FULL,
use_onnx: bool = False,
) -> None:
Expand Down
35 changes: 2 additions & 33 deletions llm_guard/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,44 +189,13 @@ def chunk_text(text: str, chunk_size: int) -> list[str]:
return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]


def chunk_text_by_sentences(text: str, max_chunk_size: int) -> list[str]:
nltk = lazy_load_dep("nltk")

try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt")

sentences = nltk.sent_tokenize(text.strip())

chunks = []
chunk = []
chunk_size = 0

for sentence in sentences:
sentence_length = len(sentence)
if chunk_size + sentence_length <= max_chunk_size:
chunk.append(sentence)
chunk_size += sentence_length
else:
if chunk: # Check if chunk is non-empty
chunks.append(" ".join(chunk))
chunk = [sentence]
chunk_size = sentence_length

if chunk: # Don't forget the last chunk, and check if it's non-empty
chunks.append(" ".join(chunk))

return [chunk for chunk in chunks if chunk.strip()]


def split_text_by_sentences(text: str) -> list[str]:
nltk = lazy_load_dep("nltk")

try:
nltk.data.find("tokenizers/punkt")
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt")
nltk.download("punkt_tab")

return nltk.sent_tokenize(text.strip())

Expand Down
2 changes: 1 addition & 1 deletion llm_guard_api/config/scanners.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ input_scanners:
model_max_length: 256
- type: PromptInjection
params:
threshold: 0.9
threshold: 0.92
match_type: truncate_head_tail
model_max_length: 256
- type: Regex
Expand Down
24 changes: 12 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ requires-python = ">=3.9,<3.13"

dependencies = [
"bc-detect-secrets==1.5.15",
"faker>=26.0.0,<27",
"faker>=26.0.0,<28",
"fuzzysearch>=0.7,<0.9",
"json-repair>=0.25.2,<0.27",
"nltk>=3.8,<4",
"json-repair>=0.25.2,<0.29",
"nltk>=3.9.1,<4",
"presidio-analyzer==2.2.354",
"presidio-anonymizer==2.2.354",
"regex==2024.7.24",
Expand All @@ -37,14 +37,14 @@ dependencies = [

[project.optional-dependencies]
onnxruntime = [
"optimum[onnxruntime]>=1.20",
"optimum[onnxruntime]>=1.21",
]
onnxruntime-gpu = [
"optimum[onnxruntime-gpu]>=1.20",
"optimum[onnxruntime-gpu]>=1.21",
]
docs-dev = [
"mkdocs>=1.5,<2",
"mkdocs-autorefs==1.0.1",
"mkdocs>=1.6,<2",
"mkdocs-autorefs==1.1.0",
"mkdocs-git-revision-date-localized-plugin>=1.2",
"mkdocs-jupyter>=0.24",
"mkdocs-material>=9.5",
Expand All @@ -54,11 +54,11 @@ docs-dev = [
dev = [
"llm_guard[docs-dev]",
"autoflake>=2,<3",
"pytest>=7.4,<9",
"pytest-cov>=4.1,<6",
"pre-commit>=3.6,<4",
"pyright~=1.1.363",
"ruff>=0.4.1,<0.6.0",
"pytest>=8.0.0,<9",
"pytest-cov>=5.0.0,<6",
"pre-commit>=3.8,<4",
"pyright~=1.1.376",
"ruff>=0.6.1,<0.7.0",
]

[project.urls]
Expand Down

0 comments on commit 0e995d4

Please sign in to comment.