* fix custom model config

* upgrade dependencies * remove announcement
protectai · Aug 20, 2024 · 0e995d4 · 0e995d4
1 parent 6633da1
commit 0e995d4
Show file tree

Hide file tree

Showing 8 changed files with 22 additions and 54 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
         types: [ python ]
 
   - repo: https://github.com/zricethezav/gitleaks
-    rev: v8.18.3
+    rev: v8.18.4
     hooks:
       - id: gitleaks
 

diff --git a/.ruff.toml b/.ruff.toml
@@ -10,7 +10,6 @@ exclude = ["examples/google_gemini.py"]
 ignore = [
     'RUF001' # String contains ambiguous `，`
 ]
-ignore-init-module-imports = true
 select = [
     'A',  # flake8-builtins
     'ASYNC',  # flake8-async

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -
 
 ### Changed
--
+- Upgrade dependencies to the latest versions.
 
 ### Removed
 -

diff --git a/docs/overrides/main.html b/docs/overrides/main.html
@@ -1,5 +1 @@
 {% extends "base.html" %}
-
-{% block announce %}
-<strong>Laiyer AI is now part of Protect AI!</strong> We're excited to join forces to continue our mission to build a safer AI powered world. <a href="https://protectai.com/press/protect-ai-acquires-laiyer-ai">Learn More</a>
-{% endblock %}
diff --git a/llm_guard/input_scanners/prompt_injection.py b/llm_guard/input_scanners/prompt_injection.py
@@ -62,6 +62,10 @@
         "max_length": 512,
         "truncation": True,
     },
+    tokenizer_kwargs={
+        "use_fast": False,
+        "token": True,
+    },
     kwargs={"token": True},  # You can also configure with your token.
 )
 
@@ -121,7 +125,7 @@ def __init__(
         self,
         *,
         model: Model | None = None,
-        threshold: float = 0.9,
+        threshold: float = 0.92,
         match_type: MatchType | str = MatchType.FULL,
         use_onnx: bool = False,
     ) -> None:

diff --git a/llm_guard/util.py b/llm_guard/util.py
@@ -189,44 +189,13 @@ def chunk_text(text: str, chunk_size: int) -> list[str]:
     return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]
 
 
-def chunk_text_by_sentences(text: str, max_chunk_size: int) -> list[str]:
-    nltk = lazy_load_dep("nltk")
-
-    try:
-        nltk.data.find("tokenizers/punkt")
-    except LookupError:
-        nltk.download("punkt")
-
-    sentences = nltk.sent_tokenize(text.strip())
-
-    chunks = []
-    chunk = []
-    chunk_size = 0
-
-    for sentence in sentences:
-        sentence_length = len(sentence)
-        if chunk_size + sentence_length <= max_chunk_size:
-            chunk.append(sentence)
-            chunk_size += sentence_length
-        else:
-            if chunk:  # Check if chunk is non-empty
-                chunks.append(" ".join(chunk))
-            chunk = [sentence]
-            chunk_size = sentence_length
-
-    if chunk:  # Don't forget the last chunk, and check if it's non-empty
-        chunks.append(" ".join(chunk))
-
-    return [chunk for chunk in chunks if chunk.strip()]
-
-
 def split_text_by_sentences(text: str) -> list[str]:
     nltk = lazy_load_dep("nltk")
 
     try:
-        nltk.data.find("tokenizers/punkt")
+        nltk.data.find("tokenizers/punkt_tab")
     except LookupError:
-        nltk.download("punkt")
+        nltk.download("punkt_tab")
 
     return nltk.sent_tokenize(text.strip())
 

diff --git a/llm_guard_api/config/scanners.yml b/llm_guard_api/config/scanners.yml
@@ -66,7 +66,7 @@ input_scanners:
       model_max_length: 256
   - type: PromptInjection
     params:
-      threshold: 0.9
+      threshold: 0.92
       match_type: truncate_head_tail
       model_max_length: 256
   - type: Regex

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,10 +21,10 @@ requires-python = ">=3.9,<3.13"
 
 dependencies = [
   "bc-detect-secrets==1.5.15",
-  "faker>=26.0.0,<27",
+  "faker>=26.0.0,<28",
   "fuzzysearch>=0.7,<0.9",
-  "json-repair>=0.25.2,<0.27",
-  "nltk>=3.8,<4",
+  "json-repair>=0.25.2,<0.29",
+  "nltk>=3.9.1,<4",
   "presidio-analyzer==2.2.354",
   "presidio-anonymizer==2.2.354",
   "regex==2024.7.24",
@@ -37,14 +37,14 @@ dependencies = [
 
 [project.optional-dependencies]
 onnxruntime = [
-  "optimum[onnxruntime]>=1.20",
+  "optimum[onnxruntime]>=1.21",
 ]
 onnxruntime-gpu = [
-  "optimum[onnxruntime-gpu]>=1.20",
+  "optimum[onnxruntime-gpu]>=1.21",
 ]
 docs-dev = [
-  "mkdocs>=1.5,<2",
-  "mkdocs-autorefs==1.0.1",
+  "mkdocs>=1.6,<2",
+  "mkdocs-autorefs==1.1.0",
   "mkdocs-git-revision-date-localized-plugin>=1.2",
   "mkdocs-jupyter>=0.24",
   "mkdocs-material>=9.5",
@@ -54,11 +54,11 @@ docs-dev = [
 dev = [
   "llm_guard[docs-dev]",
   "autoflake>=2,<3",
-  "pytest>=7.4,<9",
-  "pytest-cov>=4.1,<6",
-  "pre-commit>=3.6,<4",
-  "pyright~=1.1.363",
-  "ruff>=0.4.1,<0.6.0",
+  "pytest>=8.0.0,<9",
+  "pytest-cov>=5.0.0,<6",
+  "pre-commit>=3.8,<4",
+  "pyright~=1.1.376",
+  "ruff>=0.6.1,<0.7.0",
 ]
 
 [project.urls]
-Original file line number
+Diff line change
@@ Expand Up @@
     -
     ### Changed
-    -
+    - Upgrade dependencies to the latest versions.
     ### Removed
     -
@@ Expand Down @@