Skip to content

Commit d786d1b

Browse files
authored
LazyIndexMap for JSON objects (#522)
* adding LazyIndexMap for JSON objects * build map in one step * tests and reverse input order
1 parent 5c90851 commit d786d1b

File tree

7 files changed

+86
-11
lines changed

7 files changed

+86
-11
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ strum_macros = "0.24.3"
3333
serde_json = {version = "1.0.87", features = ["preserve_order"]}
3434
enum_dispatch = "0.3.8"
3535
serde = "1.0.147"
36-
indexmap = "1.9.1"
3736
mimalloc = { version = "0.1.30", default-features = false, optional = true }
3837
speedate = "0.7.0"
3938
ahash = "0.8.0"

src/input/parse_json.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
use std::fmt;
22

3-
use indexmap::IndexMap;
43
use pyo3::prelude::*;
54
use pyo3::types::{PyDict, PyList, PySet};
65
use serde::de::{Deserialize, DeserializeSeed, Error as SerdeError, MapAccess, SeqAccess, Visitor};
76

7+
use crate::lazy_index_map::LazyIndexMap;
8+
89
use crate::build_tools::py_err;
910

1011
#[derive(Copy, Clone, Debug)]
@@ -58,7 +59,7 @@ pub enum JsonInput {
5859
Object(JsonObject),
5960
}
6061
pub type JsonArray = Vec<JsonInput>;
61-
pub type JsonObject = IndexMap<String, JsonInput>;
62+
pub type JsonObject = LazyIndexMap<String, JsonInput>;
6263

6364
impl ToPyObject for JsonInput {
6465
fn to_object(&self, py: Python<'_>) -> PyObject {
@@ -159,15 +160,15 @@ impl<'de> Deserialize<'de> for JsonInput {
159160
{
160161
match visitor.next_key_seed(KeyDeserializer)? {
161162
Some(first_key) => {
162-
let mut values = IndexMap::new();
163+
let mut values = LazyIndexMap::new();
163164

164165
values.insert(first_key, visitor.next_value()?);
165166
while let Some((key, value)) = visitor.next_entry()? {
166167
values.insert(key, value);
167168
}
168169
Ok(JsonInput::Object(values))
169170
}
170-
None => Ok(JsonInput::Object(IndexMap::new())),
171+
None => Ok(JsonInput::Object(LazyIndexMap::new())),
171172
}
172173
}
173174
}

src/input/return_enums.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::borrow::Cow;
2+
use std::slice::Iter as SliceIter;
23

34
use pyo3::intern;
45
use pyo3::prelude::*;
@@ -10,8 +11,6 @@ use pyo3::types::PyFunction;
1011
#[cfg(not(PyPy))]
1112
use pyo3::PyTypeInfo;
1213

13-
use indexmap::map::Iter;
14-
1514
use crate::errors::{py_err_string, ErrorType, InputValue, ValError, ValLineError, ValResult};
1615
use crate::recursion_guard::RecursionGuard;
1716
use crate::validators::{CombinedValidator, Extra, Validator};
@@ -403,7 +402,7 @@ impl<'py> Iterator for AttributesGenericIterator<'py> {
403402
}
404403

405404
pub struct JsonObjectGenericIterator<'py> {
406-
object_iter: Iter<'py, String, JsonInput>,
405+
object_iter: SliceIter<'py, (String, JsonInput)>,
407406
}
408407

409408
impl<'py> JsonObjectGenericIterator<'py> {
@@ -418,7 +417,7 @@ impl<'py> Iterator for JsonObjectGenericIterator<'py> {
418417
type Item = ValResult<'py, (&'py String, &'py JsonInput)>;
419418

420419
fn next(&mut self) -> Option<Self::Item> {
421-
self.object_iter.next().map(Ok)
420+
self.object_iter.next().map(|(key, value)| Ok((key, value)))
422421
}
423422
// size_hint is omitted as it isn't needed
424423
}

src/lazy_index_map.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
use std::borrow::Borrow;
2+
use std::cell::RefCell;
3+
use std::cmp::{Eq, PartialEq};
4+
use std::fmt::Debug;
5+
use std::hash::Hash;
6+
use std::slice::Iter as SliceIter;
7+
8+
use ahash::AHashMap;
9+
10+
#[derive(Debug, Clone, Default)]
11+
pub struct LazyIndexMap<K, V> {
12+
vec: Vec<(K, V)>,
13+
map: RefCell<Option<AHashMap<K, usize>>>,
14+
}
15+
16+
/// Like [IndexMap](https://docs.rs/indexmap/latest/indexmap/) but only builds the lookup map when it's needed.
17+
impl<K, V> LazyIndexMap<K, V>
18+
where
19+
K: Clone + Debug + Eq + Hash,
20+
V: Clone + Debug,
21+
{
22+
pub fn new() -> Self {
23+
Self {
24+
vec: Vec::new(),
25+
map: RefCell::new(None),
26+
}
27+
}
28+
29+
pub fn insert(&mut self, key: K, value: V) {
30+
self.vec.push((key, value))
31+
}
32+
33+
pub fn len(&self) -> usize {
34+
self.vec.len()
35+
}
36+
37+
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V>
38+
where
39+
K: Borrow<Q> + PartialEq<Q>,
40+
Q: Hash + Eq,
41+
{
42+
let mut map = self.map.borrow_mut();
43+
if let Some(map) = map.as_ref() {
44+
map.get(key).map(|&i| &self.vec[i].1)
45+
} else {
46+
let mut new_map = AHashMap::with_capacity(self.vec.len());
47+
let mut value = None;
48+
// reverse here so the last value is the one that's returned
49+
for (index, (k, v)) in self.vec.iter().enumerate().rev() {
50+
if value.is_none() && k == key {
51+
value = Some(v);
52+
}
53+
new_map.insert(k.clone(), index);
54+
}
55+
*map = Some(new_map);
56+
value
57+
}
58+
}
59+
60+
pub fn keys(&self) -> impl Iterator<Item = &K> {
61+
self.vec.iter().map(|(k, _)| k)
62+
}
63+
64+
pub fn iter(&self) -> SliceIter<'_, (K, V)> {
65+
self.vec.iter()
66+
}
67+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ mod build_context;
1414
mod build_tools;
1515
mod errors;
1616
mod input;
17+
mod lazy_index_map;
1718
mod lookup_key;
1819
mod questions;
1920
mod recursion_guard;

tests/test_json.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import re
23

34
import pytest
@@ -100,7 +101,7 @@ def test_float(input_value, expected):
100101
assert v.validate_json(input_value) == expected
101102

102103

103-
def test_model():
104+
def test_typed_dict():
104105
v = SchemaValidator(
105106
{
106107
'type': 'typed-dict',
@@ -114,6 +115,10 @@ def test_model():
114115
# language=json
115116
input_str = '{"field_a": "abc", "field_b": 1}'
116117
assert v.validate_json(input_str) == {'field_a': 'abc', 'field_b': 1}
118+
# language=json
119+
input_str = '{"field_a": "a", "field_a": "b", "field_b": 1}'
120+
assert v.validate_json(input_str) == {'field_a': 'b', 'field_b': 1}
121+
assert v.validate_json(input_str) == {'field_a': 'b', 'field_b': 1}
117122

118123

119124
def test_float_no_remainder():
@@ -152,6 +157,10 @@ def test_dict():
152157
v = SchemaValidator({'type': 'dict', 'keys_schema': {'type': 'int'}, 'values_schema': {'type': 'int'}})
153158
assert v.validate_json('{"1": 2, "3": 4}') == {1: 2, 3: 4}
154159

160+
# duplicate keys, the last value wins, like with python
161+
assert json.loads('{"1": 1, "1": 2}') == {'1': 2}
162+
assert v.validate_json('{"1": 1, "1": 2}') == {1: 2}
163+
155164

156165
def test_dict_any_value():
157166
v = SchemaValidator({'type': 'dict', 'keys_schema': {'type': 'str'}})

0 commit comments

Comments
 (0)