From 9aca8c709025a34ceec9937e8d08f1b98466409b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Louf?= <remilouf@protonmail.com>
Date: Mon, 2 Dec 2024 15:50:52 +0100
Subject: [PATCH] Use `outlines-core` to translate JSON Schemas into regexes

---
 benchmarks/bench_json_schema.py               |   7 +-
 outlines/fsm/json_schema.py                   | 489 +-----------------
 outlines/generate/choice.py                   |   4 +-
 outlines/generate/json.py                     |   7 +-
 outlines/processors/structured.py             |   3 +-
 tests/fsm/test_json_schema.py                 |  88 ++--
 .../generate/test_integration_transformers.py |   1 +
 7 files changed, 56 insertions(+), 543 deletions(-)

diff --git a/benchmarks/bench_json_schema.py b/benchmarks/bench_json_schema.py
index 62d9b3c1d..3a1f72cb6 100644
--- a/benchmarks/bench_json_schema.py
+++ b/benchmarks/bench_json_schema.py
@@ -1,6 +1,7 @@
+from outlines_core.fsm.json_schema import build_regex_from_schema
+
 from outlines.caching import cache_disabled
 from outlines.fsm.guide import RegexGuide
-from outlines.fsm.json_schema import build_regex_from_schema
 
 from .common import setup_tokenizer  # noqa: E402
 
@@ -70,10 +71,6 @@ def setup(self, schema_name):
         self.tokenizer = setup_tokenizer()
         self.schema = schemas[schema_name]
 
-    @cache_disabled()
-    def time_json_schema_to_regex(self, schema_name):
-        build_regex_from_schema(self.schema)
-
     @cache_disabled()
     def time_json_schema_to_fsm(self, schema_name):
         regex = build_regex_from_schema(self.schema)
diff --git a/outlines/fsm/json_schema.py b/outlines/fsm/json_schema.py
index 0bab57923..bae0ad17a 100644
--- a/outlines/fsm/json_schema.py
+++ b/outlines/fsm/json_schema.py
@@ -1,90 +1,10 @@
 import inspect
 import json
-import re
 import warnings
 from enum import Enum
-from typing import Callable, Optional, Tuple, Type, Union
+from typing import Callable, Type, Union
 
-from jsonschema.protocols import Validator
 from pydantic import BaseModel, create_model
-from referencing import Registry, Resource
-from referencing._core import Resolver
-from referencing.jsonschema import DRAFT202012
-
-# allow `\"`, `\\`, or any character which isn't a control sequence
-STRING_INNER = r'([^"\\\x00-\x1F\x7F-\x9F]|\\["\\])'
-STRING = f'"{STRING_INNER}*"'
-
-INTEGER = r"(-)?(0|[1-9][0-9]*)"
-NUMBER = rf"({INTEGER})(\.[0-9]+)?([eE][+-][0-9]+)?"
-BOOLEAN = r"(true|false)"
-NULL = r"null"
-WHITESPACE = r"[ ]?"
-
-type_to_regex = {
-    "string": STRING,
-    "integer": INTEGER,
-    "number": NUMBER,
-    "boolean": BOOLEAN,
-    "null": NULL,
-}
-
-DATE_TIME = r'"(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]{3})?(Z)?"'
-DATE = r'"(?:\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])"'
-TIME = r'"(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\.[0-9]+)?(Z)?"'
-UUID = r'"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"'
-
-format_to_regex = {
-    "uuid": UUID,
-    "date-time": DATE_TIME,
-    "date": DATE,
-    "time": TIME,
-}
-
-
-def build_regex_from_schema(schema: str, whitespace_pattern: Optional[str] = None):
-    """Turn a JSON schema into a regex that matches any JSON object that follows
-    this schema.
-
-    JSON Schema is a declarative language that allows to annotate JSON documents
-    with types and descriptions. These schemas can be generated from any Python
-    datastructure that has type annotation: namedtuples, dataclasses, Pydantic
-    models. And by ensuring that the generation respects the schema we ensure
-    that the output can be parsed into these objects.
-    This function parses the provided schema and builds a generation schedule which
-    mixes deterministic generation (fixed strings), and sampling with constraints.
-
-    Parameters
-    ----------
-    schema
-        A string that represents a JSON Schema.
-    whitespace_pattern
-        Pattern to use for JSON syntactic whitespace (doesn't impact string literals)
-        Example: allow only a single space or newline with `whitespace_pattern=r"[\n ]?"`
-
-    Returns
-    -------
-    A generation schedule. A list of strings that represent the JSON
-    schema's structure and regular expression that define the structure of
-    the fields.
-
-    References
-    ----------
-    .. [0] JSON Schema. https://json-schema.org/
-
-    """
-
-    schema = json.loads(schema)
-    Validator.check_schema(schema)
-
-    # Build reference resolver
-    schema = Resource(contents=schema, specification=DRAFT202012)
-    uri = schema.id() if schema.id() is not None else ""
-    registry = Registry().with_resource(uri=uri, resource=schema)
-    resolver = registry.resolver()
-
-    content = schema.contents
-    return to_regex(resolver, content, whitespace_pattern)
 
 
 def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -> str:
@@ -120,413 +40,6 @@ def convert_json_schema_to_str(json_schema: Union[dict, str, Type[BaseModel]]) -
     return schema_str
 
 
-def _get_num_items_pattern(min_items, max_items, whitespace_pattern):
-    # Helper function for arrays and objects
-    min_items = int(min_items or 0)
-    if max_items is None:
-        return rf"{{{max(min_items - 1, 0)},}}"
-    else:
-        max_items = int(max_items)
-        if max_items < 1:
-            return None
-        return rf"{{{max(min_items - 1, 0)},{max_items - 1}}}"
-
-
-def validate_quantifiers(
-    min_bound: Optional[str], max_bound: Optional[str], start_offset: int = 0
-) -> Tuple[str, str]:
-    """
-    Ensures that the bounds of a number are valid. Bounds are used as quantifiers in the regex.
-
-    Parameters
-    ----------
-    min_bound
-        The minimum value that the number can take.
-    max_bound
-        The maximum value that the number can take.
-    start_offset
-        Number of elements that are already present in the regex but still need to be counted.
-        ex: if the regex is already "(-)?(0|[1-9][0-9])", we will always have at least 1 digit, so the start_offset is 1.
-
-    Returns
-    -------
-    min_bound
-        The minimum value that the number can take.
-    max_bound
-        The maximum value that the number can take.
-
-    Raises
-    ------
-    ValueError
-        If the minimum bound is greater than the maximum bound.
-
-    TypeError or ValueError
-        If the minimum bound is not an integer or None.
-        or
-        If the maximum bound is not an integer or None.
-    """
-    min_bound = "" if min_bound is None else str(int(min_bound) - start_offset)
-    max_bound = "" if max_bound is None else str(int(max_bound) - start_offset)
-    if min_bound and max_bound:
-        if int(max_bound) < int(min_bound):
-            raise ValueError("max bound must be greater than or equal to min bound")
-    return min_bound, max_bound
-
-
-def to_regex(
-    resolver: Resolver, instance: dict, whitespace_pattern: Optional[str] = None
-):
-    """Translate a JSON Schema instance into a regex that validates the schema.
-
-    Note
-    ----
-    Many features of JSON schema are missing:
-    - Handle `additionalProperties` keyword
-    - Handle types defined as a list
-    - Handle constraints on numbers
-    - Handle special patterns: `date`, `uri`, etc.
-
-    This does not support recursive definitions.
-
-    Parameters
-    ----------
-    resolver
-        An object that resolves references to other instances within a schema
-    instance
-        The instance to translate
-    whitespace_pattern
-        Pattern to use for JSON syntactic whitespace (doesn't impact string literals)
-        Example: allow only a single space or newline with `whitespace_pattern=r"[\n ]?"`
-    """
-
-    # set whitespace pattern
-    if whitespace_pattern is None:
-        whitespace_pattern = WHITESPACE
-
-    if instance == {}:
-        # JSON Schema Spec: Empty object means unconstrained, any json type is legal
-        types = [
-            {"type": "boolean"},
-            {"type": "null"},
-            {"type": "number"},
-            {"type": "integer"},
-            {"type": "string"},
-            {"type": "array"},
-            {"type": "object"},
-        ]
-        regexes = [to_regex(resolver, t, whitespace_pattern) for t in types]
-        regexes = [rf"({r})" for r in regexes]
-        return rf"{'|'.join(regexes)}"
-
-    elif "properties" in instance:
-        regex = ""
-        regex += r"\{"
-        properties = instance["properties"]
-        required_properties = instance.get("required", [])
-        is_required = [item in required_properties for item in properties]
-        # If at least one property is required, we include the one in the lastest position
-        # without any comma.
-        # For each property before it (optional or required), we add with a comma after the property.
-        # For each property after it (optional), we add with a comma before the property.
-        if any(is_required):
-            last_required_pos = max([i for i, value in enumerate(is_required) if value])
-            for i, (name, value) in enumerate(properties.items()):
-                subregex = f'{whitespace_pattern}"{re.escape(name)}"{whitespace_pattern}:{whitespace_pattern}'
-                subregex += to_regex(resolver, value, whitespace_pattern)
-                if i < last_required_pos:
-                    subregex = f"{subregex}{whitespace_pattern},"
-                elif i > last_required_pos:
-                    subregex = f"{whitespace_pattern},{subregex}"
-                regex += subregex if is_required[i] else f"({subregex})?"
-        # If no property is required, we have to create a possible pattern for each property in which
-        # it's the last one necessarilly present. Then, we add the others as optional before and after
-        # following the same strategy as described above.
-        # The whole block is made optional to allow the case in which no property is returned.
-        else:
-            property_subregexes = []
-            for i, (name, value) in enumerate(properties.items()):
-                subregex = f'{whitespace_pattern}"{name}"{whitespace_pattern}:{whitespace_pattern}'
-                subregex += to_regex(resolver, value, whitespace_pattern)
-                property_subregexes.append(subregex)
-            possible_patterns = []
-            for i in range(len(property_subregexes)):
-                pattern = ""
-                for subregex in property_subregexes[:i]:
-                    pattern += f"({subregex}{whitespace_pattern},)?"
-                pattern += property_subregexes[i]
-                for subregex in property_subregexes[i + 1 :]:
-                    pattern += f"({whitespace_pattern},{subregex})?"
-                possible_patterns.append(pattern)
-            regex += f"({'|'.join(possible_patterns)})?"
-
-        regex += f"{whitespace_pattern}" + r"\}"
-
-        return regex
-
-    # To validate against allOf, the given data must be valid against all of the
-    # given subschemas.
-    elif "allOf" in instance:
-        subregexes = [
-            to_regex(resolver, t, whitespace_pattern) for t in instance["allOf"]
-        ]
-        subregexes_str = [f"{subregex}" for subregex in subregexes]
-        return rf"({''.join(subregexes_str)})"
-
-    # To validate against `anyOf`, the given data must be valid against
-    # any (one or more) of the given subschemas.
-    elif "anyOf" in instance:
-        subregexes = [
-            to_regex(resolver, t, whitespace_pattern) for t in instance["anyOf"]
-        ]
-        return rf"({'|'.join(subregexes)})"
-
-    # To validate against oneOf, the given data must be valid against exactly
-    # one of the given subschemas.
-    elif "oneOf" in instance:
-        subregexes = [
-            to_regex(resolver, t, whitespace_pattern) for t in instance["oneOf"]
-        ]
-
-        xor_patterns = [f"(?:{subregex})" for subregex in subregexes]
-
-        return rf"({'|'.join(xor_patterns)})"
-
-    # Create pattern for Tuples, per JSON Schema spec, `prefixItems` determines types at each idx
-    elif "prefixItems" in instance:
-        element_patterns = [
-            to_regex(resolver, t, whitespace_pattern) for t in instance["prefixItems"]
-        ]
-        comma_split_pattern = rf"{whitespace_pattern},{whitespace_pattern}"
-        tuple_inner = comma_split_pattern.join(element_patterns)
-        return rf"\[{whitespace_pattern}{tuple_inner}{whitespace_pattern}\]"
-
-    # The enum keyword is used to restrict a value to a fixed set of values. It
-    # must be an array with at least one element, where each element is unique.
-    elif "enum" in instance:
-        choices = []
-        for choice in instance["enum"]:
-            if type(choice) in [int, float, bool, type(None), str]:
-                choices.append(re.escape(json.dumps(choice)))
-            elif isinstance(choice, dict):
-                choices.append(to_regex(resolver, choice, whitespace_pattern))
-            else:
-                raise TypeError(f"Unsupported data type in enum: {type(choice)}")
-        return f"({'|'.join(choices)})"
-
-    elif "const" in instance:
-        const = instance["const"]
-        if type(const) in [int, float, bool, type(None), str]:
-            const = re.escape(json.dumps(const))
-        else:
-            raise TypeError(f"Unsupported data type in const: {type(const)}")
-        return const
-
-    elif "$ref" in instance:
-        path = f"{instance['$ref']}"
-        instance = resolver.lookup(path).contents
-        return to_regex(resolver, instance, whitespace_pattern)
-
-    # The type keyword may either be a string or an array:
-    # - If it's a string, it is the name of one of the basic types.
-    # - If it is an array, it must be an array of strings, where each string is
-    # the name of one of the basic types, and each element is unique. In this
-    # case, the JSON snippet is valid if it matches any of the given types.
-    elif "type" in instance:
-        instance_type = instance["type"]
-        if instance_type == "string":
-            if "maxLength" in instance or "minLength" in instance:
-                max_items = instance.get("maxLength", "")
-                min_items = instance.get("minLength", "")
-                try:
-                    if int(max_items) < int(min_items):
-                        raise ValueError(
-                            "maxLength must be greater than or equal to minLength"
-                        )  # FIXME this raises an error but is caught right away by the except (meant for int("") I assume)
-                except ValueError:
-                    pass
-                return f'"{STRING_INNER}{{{min_items},{max_items}}}"'
-            elif "pattern" in instance:
-                pattern = instance["pattern"]
-                if pattern[0] == "^" and pattern[-1] == "$":
-                    return rf'("{pattern[1:-1]}")'
-                else:
-                    return rf'("{pattern}")'
-            elif "format" in instance:
-                format = instance["format"]
-                if format == "date-time":
-                    return format_to_regex["date-time"]
-                elif format == "uuid":
-                    return format_to_regex["uuid"]
-                elif format == "date":
-                    return format_to_regex["date"]
-                elif format == "time":
-                    return format_to_regex["time"]
-                else:
-                    raise NotImplementedError(
-                        f"Format {format} is not supported by Outlines"
-                    )
-            else:
-                return type_to_regex["string"]
-
-        elif instance_type == "number":
-            bounds = {
-                "minDigitsInteger",
-                "maxDigitsInteger",
-                "minDigitsFraction",
-                "maxDigitsFraction",
-                "minDigitsExponent",
-                "maxDigitsExponent",
-            }
-            if bounds.intersection(set(instance.keys())):
-                min_digits_integer, max_digits_integer = validate_quantifiers(
-                    instance.get("minDigitsInteger"),
-                    instance.get("maxDigitsInteger"),
-                    start_offset=1,
-                )
-                min_digits_fraction, max_digits_fraction = validate_quantifiers(
-                    instance.get("minDigitsFraction"), instance.get("maxDigitsFraction")
-                )
-                min_digits_exponent, max_digits_exponent = validate_quantifiers(
-                    instance.get("minDigitsExponent"), instance.get("maxDigitsExponent")
-                )
-                integers_quantifier = (
-                    f"{{{min_digits_integer},{max_digits_integer}}}"
-                    if min_digits_integer or max_digits_integer
-                    else "*"
-                )
-                fraction_quantifier = (
-                    f"{{{min_digits_fraction},{max_digits_fraction}}}"
-                    if min_digits_fraction or max_digits_fraction
-                    else "+"
-                )
-                exponent_quantifier = (
-                    f"{{{min_digits_exponent},{max_digits_exponent}}}"
-                    if min_digits_exponent or max_digits_exponent
-                    else "+"
-                )
-                return rf"((-)?(0|[1-9][0-9]{integers_quantifier}))(\.[0-9]{fraction_quantifier})?([eE][+-][0-9]{exponent_quantifier})?"
-            return type_to_regex["number"]
-
-        elif instance_type == "integer":
-            if "minDigits" in instance or "maxDigits" in instance:
-                min_digits, max_digits = validate_quantifiers(
-                    instance.get("minDigits"), instance.get("maxDigits"), start_offset=1
-                )
-                return rf"(-)?(0|[1-9][0-9]{{{min_digits},{max_digits}}})"
-            return type_to_regex["integer"]
-
-        elif instance_type == "array":
-            num_repeats = _get_num_items_pattern(
-                instance.get("minItems"), instance.get("maxItems"), whitespace_pattern
-            )
-            if num_repeats is None:
-                return rf"\[{whitespace_pattern}\]"
-
-            allow_empty = "?" if int(instance.get("minItems", 0)) == 0 else ""
-
-            if "items" in instance:
-                items_regex = to_regex(resolver, instance["items"], whitespace_pattern)
-                return rf"\[{whitespace_pattern}(({items_regex})(,{whitespace_pattern}({items_regex})){num_repeats}){allow_empty}{whitespace_pattern}\]"
-            else:
-                # Here we need to make the choice to exclude generating list of objects
-                # if the specification of the object is not given, even though a JSON
-                # object that contains an object here would be valid under the specification.
-                legal_types = [
-                    {"type": "boolean"},
-                    {"type": "null"},
-                    {"type": "number"},
-                    {"type": "integer"},
-                    {"type": "string"},
-                ]
-                depth = instance.get("depth", 2)
-                if depth > 0:
-                    legal_types.append({"type": "object", "depth": depth - 1})
-                    legal_types.append({"type": "array", "depth": depth - 1})
-
-                regexes = [
-                    to_regex(resolver, t, whitespace_pattern) for t in legal_types
-                ]
-                return rf"\[{whitespace_pattern}({'|'.join(regexes)})(,{whitespace_pattern}({'|'.join(regexes)})){num_repeats}{allow_empty}{whitespace_pattern}\]"
-
-        elif instance_type == "object":
-            # pattern for json object with values defined by instance["additionalProperties"]
-            # enforces value type constraints recursively, "minProperties", and "maxProperties"
-            # doesn't enforce "required", "dependencies", "propertyNames" "any/all/on Of"
-            num_repeats = _get_num_items_pattern(
-                instance.get("minProperties"),
-                instance.get("maxProperties"),
-                whitespace_pattern,
-            )
-            if num_repeats is None:
-                return rf"\{{{whitespace_pattern}\}}"
-
-            allow_empty = "?" if int(instance.get("minProperties", 0)) == 0 else ""
-
-            additional_properties = instance.get("additionalProperties")
-
-            if additional_properties is None or additional_properties is True:
-                # JSON Schema behavior: If the additionalProperties of an object is
-                # unset or True, it is unconstrained object.
-                # We handle this by setting additionalProperties to anyOf: {all types}
-
-                legal_types = [
-                    {"type": "string"},
-                    {"type": "number"},
-                    {"type": "boolean"},
-                    {"type": "null"},
-                ]
-
-                # We set the object depth to 2 to keep the expression finite, but the "depth"
-                # key is not a true component of the JSON Schema specification.
-                depth = instance.get("depth", 2)
-                if depth > 0:
-                    legal_types.append({"type": "object", "depth": depth - 1})
-                    legal_types.append({"type": "array", "depth": depth - 1})
-                additional_properties = {"anyOf": legal_types}
-
-            value_pattern = to_regex(
-                resolver, additional_properties, whitespace_pattern
-            )
-            key_value_pattern = (
-                f"{STRING}{whitespace_pattern}:{whitespace_pattern}{value_pattern}"
-            )
-            key_value_successor_pattern = (
-                f"{whitespace_pattern},{whitespace_pattern}{key_value_pattern}"
-            )
-            multiple_key_value_pattern = f"({key_value_pattern}({key_value_successor_pattern}){num_repeats}){allow_empty}"
-
-            return (
-                r"\{"
-                + whitespace_pattern
-                + multiple_key_value_pattern
-                + whitespace_pattern
-                + r"\}"
-            )
-
-        elif instance_type == "boolean":
-            return type_to_regex["boolean"]
-
-        elif instance_type == "null":
-            return type_to_regex["null"]
-
-        elif isinstance(instance_type, list):
-            # Here we need to make the choice to exclude generating an object
-            # if the specification of the object is not give, even though a JSON
-            # object that contains an object here would be valid under the specification.
-            regexes = [
-                to_regex(resolver, {"type": t}, whitespace_pattern)
-                for t in instance_type
-                if t != "object"
-            ]
-            return rf"({'|'.join(regexes)})"
-
-    raise NotImplementedError(
-        f"""Could not translate the instance {instance} to a
-    regular expression. Make sure it is valid to the JSON Schema specification. If
-    it is, please open an issue on the Outlines repository"""
-    )
-
-
 def get_schema_from_signature(fn: Callable) -> dict:
     """Turn a function signature into a JSON schema.
 
diff --git a/outlines/generate/choice.py b/outlines/generate/choice.py
index 75fc71271..afb998f52 100644
--- a/outlines/generate/choice.py
+++ b/outlines/generate/choice.py
@@ -4,7 +4,9 @@
 from functools import singledispatch
 from typing import Callable, List, Union
 
-from outlines.fsm.json_schema import build_regex_from_schema, get_schema_from_enum
+from outlines_core.fsm.json_schema import build_regex_from_schema
+
+from outlines.fsm.json_schema import get_schema_from_enum
 from outlines.generate.api import SequenceGeneratorAdapter
 from outlines.models import OpenAI
 from outlines.samplers import Sampler, multinomial
diff --git a/outlines/generate/json.py b/outlines/generate/json.py
index 703447958..d098d920d 100644
--- a/outlines/generate/json.py
+++ b/outlines/generate/json.py
@@ -3,13 +3,10 @@
 from functools import singledispatch
 from typing import Callable, Optional, Union
 
+from outlines_core.fsm.json_schema import build_regex_from_schema
 from pydantic import BaseModel
 
-from outlines.fsm.json_schema import (
-    build_regex_from_schema,
-    get_schema_from_enum,
-    get_schema_from_signature,
-)
+from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature
 from outlines.generate.api import SequenceGeneratorAdapter
 from outlines.models import OpenAI
 from outlines.samplers import Sampler, multinomial
diff --git a/outlines/processors/structured.py b/outlines/processors/structured.py
index d2bc15f77..64892b73f 100644
--- a/outlines/processors/structured.py
+++ b/outlines/processors/structured.py
@@ -27,10 +27,11 @@
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
 
 import torch
+from outlines_core.fsm.json_schema import build_regex_from_schema
 from pydantic import BaseModel
 
 from outlines.fsm.guide import CFGGuide, Guide, RegexGuide
-from outlines.fsm.json_schema import build_regex_from_schema, convert_json_schema_to_str
+from outlines.fsm.json_schema import convert_json_schema_to_str
 
 from .base_logits_processor import OutlinesLogitsProcessor
 
diff --git a/tests/fsm/test_json_schema.py b/tests/fsm/test_json_schema.py
index 6f0b59c50..9cfc110bb 100644
--- a/tests/fsm/test_json_schema.py
+++ b/tests/fsm/test_json_schema.py
@@ -7,9 +7,7 @@
 
 import interegular
 import pytest
-from pydantic import BaseModel, Field, constr
-
-from outlines.fsm.json_schema import (
+from outlines_core.fsm.json_schema import (
     BOOLEAN,
     DATE,
     DATE_TIME,
@@ -22,10 +20,11 @@
     UUID,
     WHITESPACE,
     build_regex_from_schema,
-    get_schema_from_enum,
-    get_schema_from_signature,
     to_regex,
 )
+from pydantic import BaseModel, Field, constr
+
+from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature
 
 
 def test_function_basic():
@@ -75,7 +74,7 @@ class User(BaseModel):
 )
 def test_match_integer(pattern, does_match):
     step = {"title": "Foo", "type": "integer"}
-    regex = to_regex(None, step)
+    regex = to_regex(step)
     assert regex == INTEGER
 
     value = pattern["integer"]
@@ -102,7 +101,7 @@ def test_match_integer(pattern, does_match):
 )
 def test_match_number(pattern, does_match):
     step = {"title": "Foo", "type": "number"}
-    regex = to_regex(None, step)
+    regex = to_regex(step)
     assert regex == NUMBER
 
     value = pattern["number"]
@@ -137,7 +136,7 @@ def test_match_number(pattern, does_match):
         # String with maximum length
         (
             {"title": "Foo", "type": "string", "maxLength": 3},
-            f'"{STRING_INNER}{{,3}}"',
+            f'"{STRING_INNER}{{0,3}}"',
             [('"ab"', True), ('"a""', False), ('"abcd"', False)],
         ),
         # String with minimum length
@@ -240,40 +239,43 @@ def test_match_number(pattern, does_match):
             [("0", True), ("1", True), ("a", False)],
         ),
         # Enum mix of types
-        (
-            {
-                "title": "Foo",
-                "enum": [
-                    6,
-                    5.3,
-                    "potato",
-                    True,
-                    None,
-                    {
-                        "properties": {
-                            "a": {"title": "A", "type": "number"},
-                            "b": {"title": "B", "type": "number"},
-                        },
-                        "required": ["a", "b"],
-                        "title": "add",
-                        "type": "object",
-                    },
-                ],
-            },
-            r'(6|5\.3|"potato"|true|null|\{[ ]?"a"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\.[0-9]+)?([eE][+-][0-9]+)?[ ]?,[ ]?"b"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\.[0-9]+)?([eE][+-][0-9]+)?[ ]?\})',
-            [
-                ("6", True),
-                ("5.3", True),
-                ('"potato"', True),
-                ("true", True),
-                ("null", True),
-                ("523", False),
-                ("True", False),
-                ("None", False),
-                ('{"a": -1.0, "b": 1.1}', True),
-                ('{"a": "a", "b": 1.1}', False),
-            ],
-        ),
+        #
+        # Enums of objects are not supported by outlines-core yet,
+        # see https://github.com/dottxt-ai/outlines-core/issues/100
+        # (
+        #     {
+        #         "title": "Foo",
+        #         "enum": [
+        #             6,
+        #             5.3,
+        #             "potato",
+        #             True,
+        #             None,
+        #             {
+        #                 "properties": {
+        #                     "a": {"title": "A", "type": "number"},
+        #                     "b": {"title": "B", "type": "number"},
+        #                 },
+        #                 "required": ["a", "b"],
+        #                 "title": "add",
+        #                 "type": "object",
+        #             },
+        #         ],
+        #     },
+        #     r'(6|5\.3|"potato"|true|null|\{[ ]?"a"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\.[0-9]+)?([eE][+-][0-9]+)?[ ]?,[ ]?"b"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\.[0-9]+)?([eE][+-][0-9]+)?[ ]?\})',
+        #     [
+        #         ("6", True),
+        #         ("5.3", True),
+        #         ('"potato"', True),
+        #         ("true", True),
+        #         ("null", True),
+        #         ("523", False),
+        #         ("True", False),
+        #         ("None", False),
+        #         ('{"a": -1.0, "b": 1.1}', True),
+        #         ('{"a": "a", "b": 1.1}', False),
+        #     ],
+        # ),
         # integer
         (
             {
@@ -308,7 +310,7 @@ def test_match_number(pattern, does_match):
                 },
                 "required": ["count"],
             },
-            '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]{,2})[ ]?\\}',
+            '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]{0,2})[ ]?\\}',
             [('{ "count": 100 }', True), ('{ "count": 1000 }', False)],
         ),
         # integer with minimum and maximum digits
diff --git a/tests/generate/test_integration_transformers.py b/tests/generate/test_integration_transformers.py
index 92c5d789c..7b7973d23 100644
--- a/tests/generate/test_integration_transformers.py
+++ b/tests/generate/test_integration_transformers.py
@@ -363,6 +363,7 @@ def mul(c: float, d: float) -> float:
     return c * d
 
 
+@pytest.mark.xfail(reason="Enum of objects are not supported in outlines-core")
 def test_transformers_json_function_enum(model):
     prompt = "Output some JSON "