From 6f057eab86169ad4b6931e9240a047f39eb8413f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Thu, 11 Jul 2024 14:04:18 -0400 Subject: [PATCH] Improve cycle checks in `JSON.stringify` (#700) * Improve cycle checks in `JSON.stringify` Prior to this commit, the following program will erroneously fail, with a `circular dependency` error: const template = { foo: "foo", bar: "bar", }; const x = []; x.push(template); x.push(template); x.push(template); x.push(template); x.push(template); x.push(template); console.log(JSON.stringify(x)); The logic for testing for circular dependencies didn't correctly manage the stack state and therefore pushing the same object multiple times to an array resulted in a false-positive cycle. Cycles can only really happen in Arrays and Objects. Thsi commit solidifies the approach to checking for cycles, by scoping cycle checks and cycle stack management to the `MapAccess` and `SeqAccess` structs. * Proper version and CHANGELOG entry * Fix typos --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/javy/CHANGELOG.md | 5 + crates/javy/Cargo.toml | 2 +- crates/javy/src/serde/de.rs | 142 +++++++++++++++++++-------- crates/javy/tests/misc.rs | 19 ++++ crates/javy/tests/stringify_cycle.js | 15 +++ 7 files changed, 144 insertions(+), 43 deletions(-) create mode 100644 crates/javy/tests/stringify_cycle.js diff --git a/Cargo.lock b/Cargo.lock index bce5c1bd..88341a78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1563,7 +1563,7 @@ dependencies = [ [[package]] name = "javy" -version = "3.0.0" +version = "3.0.1-alpha.1" dependencies = [ "anyhow", "bitflags", diff --git a/Cargo.toml b/Cargo.toml index dabd4d33..216069ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ anyhow = "1.0" once_cell = "1.19" bitflags = "2.6.0" javy-config = { path = "crates/config" } -javy = { path = "crates/javy", version = "3.0.0" } +javy = { path = "crates/javy", version = "3.0.1-alpha.1" } tempfile = "3.10.1" uuid = { version = "1.9", features = ["v4"] } diff --git a/crates/javy/CHANGELOG.md b/crates/javy/CHANGELOG.md index 5926f4bf..d0028277 100644 --- a/crates/javy/CHANGELOG.md +++ b/crates/javy/CHANGELOG.md @@ -8,6 +8,11 @@ Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Fixed + +- Circular dependency checks for the custom, SIMD-based, `JSON.stringify` + (https://github.com/bytecodealliance/javy/pull/700) + ## [3.0.0] - 2024-06-12 ### Changed diff --git a/crates/javy/Cargo.toml b/crates/javy/Cargo.toml index a8257dcb..3bbe2e94 100644 --- a/crates/javy/Cargo.toml +++ b/crates/javy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "javy" -version = "3.0.0" +version = "3.0.1-alpha.1" authors.workspace = true edition.workspace = true license.workspace = true diff --git a/crates/javy/src/serde/de.rs b/crates/javy/src/serde/de.rs index f1f84c75..1e51831a 100644 --- a/crates/javy/src/serde/de.rs +++ b/crates/javy/src/serde/de.rs @@ -2,12 +2,12 @@ use crate::quickjs::{ function::This, object::ObjectIter, qjs::{JS_GetClassID, JS_GetProperty}, - Array, Exception, Filter, String as JSString, Value, + Array, Exception, Filter, Object, String as JSString, Value, }; use crate::serde::err::{Error, Result}; use crate::serde::{MAX_SAFE_INTEGER, MIN_SAFE_INTEGER}; use crate::to_string_lossy; -use anyhow::anyhow; +use anyhow::{anyhow, bail}; use rquickjs::{atom::PredefinedAtom, Function, Null}; use serde::de::{self, Error as SerError}; use serde::forward_to_deserialize_any; @@ -61,6 +61,7 @@ impl<'de> From> for Deserializer<'de> { } } } + impl<'js> Deserializer<'js> { fn deserialize_number<'de, V>(&mut self, visitor: V) -> Result where @@ -97,6 +98,15 @@ impl<'js> Deserializer<'js> { unreachable!() } + /// Pops the last visited value present in the stack. + fn pop_visited(&mut self) -> Result> { + let v = self + .stack + .pop() + .ok_or_else(|| anyhow!("No entries found in the deserializer stack"))?; + Ok(v) + } + /// When stringifying, circular dependencies are not allowed. This function /// checks the current value stack to ensure that if the same value (tag and /// bits) is found again a proper error is raised. @@ -176,39 +186,15 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { } if self.value.is_array() { - self.check_cycles() - .map(|_| self.stack.push(self.value.clone()))?; let arr = self.value.as_array().unwrap().clone(); - // Retrieve the `length` property from the object itself rather than - // using the bindings `Array::len` given that according to the spec - // it's fine to return any value, not just a number from the - // `length` property. - let value: Value = arr.as_object().get(PredefinedAtom::Length)?; - let length: usize = if value.is_number() { - value.as_number().unwrap() as usize - } else { - let value_of: Function = value - .as_object() - .expect("length to be an object") - .get(PredefinedAtom::ValueOf)?; - value_of.call(())? - }; - let seq_access = SeqAccess { - de: self, - length, - seq: arr, - index: 0, - }; + + let seq_access = SeqAccess::new(self, arr)?; let result = visitor.visit_seq(seq_access); - self.stack.pop(); return result; } if self.value.is_object() { - ensure_supported(&self.value).and_then(|_| { - self.check_cycles() - .map(|_| self.stack.push(self.value.clone())) - })?; + ensure_supported(&self.value)?; if let Some(f) = get_to_json(&self.value) { let v: Value = f.call((This(self.value.clone()),))?; @@ -221,18 +207,8 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { return self.deserialize_any(visitor); } - let filter = Filter::new().enum_only().string(); - let obj = self.value.as_object().unwrap(); - let properties: ObjectIter<'_, _, Value<'_>> = - obj.own_props::, Value<'_>>(filter); - self.stack.push(self.value.clone()); - let map_access = MapAccess { - de: self, - properties, - }; - + let map_access = MapAccess::new(self, self.value.clone().into_object().unwrap())?; let result = visitor.visit_map(map_access); - self.stack.pop(); return result; } @@ -293,9 +269,42 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { } } +/// A helper struct for deserializing objects. struct MapAccess<'a, 'de: 'a> { + /// The deserializer. de: &'a mut Deserializer<'de>, + /// The object properties. properties: ObjectIter<'de, Value<'de>, Value<'de>>, + /// The current object. + obj: Object<'de>, +} + +impl<'a, 'de> MapAccess<'a, 'de> { + fn new(de: &'a mut Deserializer<'de>, obj: Object<'de>) -> Result { + let filter = Filter::new().enum_only().string(); + let properties: ObjectIter<'_, _, Value<'_>> = + obj.own_props::, Value<'_>>(filter); + + let val = obj.clone().into_value(); + de.stack.push(val.clone()); + + Ok(Self { + de, + properties, + obj, + }) + } + + /// Pops the top level value representing this sequence. + /// Errors if a different value is popped. + fn pop(&mut self) -> anyhow::Result<()> { + let v = self.de.pop_visited()?; + if v != self.obj.clone().into_value() { + bail!("Popped a mismatched value. Expected the top level sequence value"); + } + + Ok(()) + } } impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { @@ -344,6 +353,7 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { return seed.deserialize(&mut *self.de).map(Some); } else { + self.pop()?; return Ok(None); } } @@ -354,17 +364,64 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { V: de::DeserializeSeed<'de>, { self.de.value = self.de.current_kv.clone().unwrap().1; + self.de.check_cycles()?; seed.deserialize(&mut *self.de) } } +/// A helper struct for deserializing sequences. struct SeqAccess<'a, 'de: 'a> { + /// The deserializer. de: &'a mut Deserializer<'de>, + /// The sequence, represented as a JavaScript array. seq: Array<'de>, + /// The sequence length. length: usize, + /// The current index. index: usize, } +impl<'a, 'de: 'a> SeqAccess<'a, 'de> { + /// Creates a new `SeqAccess` ensuring that the top-level value is added + /// to the `Deserializer` visitor stack. + fn new(de: &'a mut Deserializer<'de>, seq: Array<'de>) -> Result { + de.stack.push(seq.clone().into_value()); + + // Retrieve the `length` property from the object itself rather than + // using the bindings `Array::len` given that according to the spec + // it's fine to return any value, not just a number from the + // `length` property. + let value: Value = seq.as_object().get(PredefinedAtom::Length)?; + let length: usize = if value.is_number() { + value.as_number().unwrap() as usize + } else { + let value_of: Function = value + .as_object() + .expect("length to be an object") + .get(PredefinedAtom::ValueOf)?; + value_of.call(())? + }; + + Ok(Self { + de, + seq, + length, + index: 0, + }) + } + + /// Pops the top level value representing this sequence. + /// Errors if a different value is popped. + fn pop(&mut self) -> anyhow::Result<()> { + let v = self.de.pop_visited()?; + if v != self.seq.clone().into_value() { + bail!("Popped a mismatched value. Expected the top level sequence value"); + } + + Ok(()) + } +} + impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { type Error = Error; @@ -385,8 +442,13 @@ impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { self.de.value = Null.into_value(self.seq.ctx().clone()) } self.index += 1; + // Check cycles right before starting the deserialization for the + // sequence elements. + self.de.check_cycles()?; seed.deserialize(&mut *self.de).map(Some) } else { + // Pop the sequence when there are no more elements. + self.pop()?; Ok(None) } } diff --git a/crates/javy/tests/misc.rs b/crates/javy/tests/misc.rs index 435a484b..8f4dd5a9 100644 --- a/crates/javy/tests/misc.rs +++ b/crates/javy/tests/misc.rs @@ -19,3 +19,22 @@ fn string_keys_and_ref_counting() -> Result<()> { Ok(()) } + +#[cfg(feature = "json")] +#[test] +fn json_stringify_cycle_checks() -> Result<()> { + let mut config = Config::default(); + config.override_json_parse_and_stringify(true); + + let source = include_bytes!("stringify_cycle.js"); + let rt = Runtime::new(config)?; + + rt.context().with(|this| { + let _: () = this + .eval_with_options(*source, EvalOptions::default()) + .inspect_err(|e| println!("{e}")) + .expect("source evaluation to succeed"); + }); + + Ok(()) +} diff --git a/crates/javy/tests/stringify_cycle.js b/crates/javy/tests/stringify_cycle.js new file mode 100644 index 00000000..0e700268 --- /dev/null +++ b/crates/javy/tests/stringify_cycle.js @@ -0,0 +1,15 @@ +const template = { + foo: "foo", + bar: "bar", +}; +const x = []; + +x.push(template); +x.push(template); +x.push(template); +x.push(template); +x.push(template); +x.push(template); + + +console.log(JSON.stringify(x));