diff --git a/src/rust/jsg-test/tests/mod.rs b/src/rust/jsg-test/tests/mod.rs index 9f88cd69eec..e596e10203e 100644 --- a/src/rust/jsg-test/tests/mod.rs +++ b/src/rust/jsg-test/tests/mod.rs @@ -12,4 +12,5 @@ mod local_cast; mod non_coercible; mod resource_callback; mod resource_conversion; +mod string; mod unwrap; diff --git a/src/rust/jsg-test/tests/string.rs b/src/rust/jsg-test/tests/string.rs new file mode 100644 index 00000000000..ee2c15f8cfb --- /dev/null +++ b/src/rust/jsg-test/tests/string.rs @@ -0,0 +1,613 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +use jsg::v8::Local; +use jsg::v8::MaybeLocal; +use jsg::v8::String as JsString; +use jsg::v8::ToLocalValue; +use jsg::v8::Utf8Value; +use jsg::v8::Value; +use jsg::v8::WriteFlags; + +// Convenience: create a Local from a UTF-8 str literal. +fn from_utf8<'a>(lock: &mut jsg::Lock, s: &str) -> Local<'a, JsString> { + JsString::new_from_utf8(lock, s.as_bytes()).unwrap(lock) +} + +// ============================================================================= +// Local — static constructors +// ============================================================================= + +#[test] +fn string_empty_has_zero_length() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = JsString::empty(lock); + assert_eq!(s.length(), 0); + assert!(s.is_one_byte()); + Ok(()) + }); +} + +#[test] +fn string_new_from_utf8_roundtrips_ascii() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let maybe = JsString::new_from_utf8(lock, b"hello"); + assert!(!maybe.is_empty()); + let s = maybe.unwrap(lock); + assert_eq!(s.length(), 5); + assert_eq!(s.to_string(lock), "hello"); + Ok(()) + }); +} + +#[test] +fn string_new_from_utf8_roundtrips_unicode() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "こんにちは"; + let s = from_utf8(lock, input); + assert_eq!(s.to_string(lock), input); + Ok(()) + }); +} + +#[test] +fn string_new_from_utf8_empty_slice() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let maybe = JsString::new_from_utf8(lock, b""); + assert!(!maybe.is_empty()); + let s = maybe.unwrap(lock); + assert_eq!(s.length(), 0); + Ok(()) + }); +} + +#[test] +fn string_new_from_one_byte_roundtrips() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let maybe = JsString::new_from_one_byte(lock, b"latin1"); + assert!(!maybe.is_empty()); + let s = maybe.unwrap(lock); + assert_eq!(s.length(), 6); + assert!(s.is_one_byte()); + assert_eq!(s.to_string(lock), "latin1"); + Ok(()) + }); +} + +#[test] +fn string_new_from_two_byte_roundtrips() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let utf16: Vec = "hello".encode_utf16().collect(); + let maybe = JsString::new_from_two_byte(lock, &utf16); + assert!(!maybe.is_empty()); + let s = maybe.unwrap(lock); + assert_eq!(s.length(), 5); + assert_eq!(s.to_string(lock), "hello"); + Ok(()) + }); +} + +#[test] +fn string_new_from_two_byte_unicode() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "日本語"; + let utf16: Vec = input.encode_utf16().collect(); + let s = JsString::new_from_two_byte(lock, &utf16).unwrap(lock); + assert_eq!(s.to_string(lock), input); + Ok(()) + }); +} + +#[test] +fn string_new_from_utf8_internalized_roundtrips() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s1 = JsString::new_internalized_from_utf8(lock, b"intern_me").unwrap(lock); + let s2 = JsString::new_internalized_from_utf8(lock, b"intern_me").unwrap(lock); + // Both strings should have the same content. + assert_eq!(s1.to_string(lock), "intern_me"); + assert_eq!(s2.to_string(lock), "intern_me"); + // Internalized strings with equal content compare equal. + assert_eq!(s1, s2); + Ok(()) + }); +} + +#[test] +fn string_new_from_one_byte_internalized_roundtrips() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = JsString::new_internalized_from_one_byte(lock, b"latin1_intern").unwrap(lock); + assert_eq!(s.to_string(lock), "latin1_intern"); + assert!(s.is_one_byte()); + Ok(()) + }); +} + +#[test] +fn string_new_from_two_byte_internalized_roundtrips() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let utf16: Vec = "two_byte_intern".encode_utf16().collect(); + let s = JsString::new_internalized_from_two_byte(lock, &utf16).unwrap(lock); + assert_eq!(s.to_string(lock), "two_byte_intern"); + Ok(()) + }); +} + +// ============================================================================= +// Local — instance methods +// ============================================================================= + +#[test] +fn string_length_matches_utf16_code_units() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + // ASCII: 1 byte per char = 1 UTF-16 code unit + assert_eq!(from_utf8(lock, "hello").length(), 5); + // Each BMP kanji = 1 UTF-16 code unit + assert_eq!(from_utf8(lock, "日本語").length(), 3); + Ok(()) + }); +} + +#[test] +fn string_is_one_byte_for_ascii() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "ascii only"); + assert!(s.is_one_byte()); + assert!(s.contains_only_one_byte()); + Ok(()) + }); +} + +#[test] +fn string_is_not_one_byte_for_multibyte() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "こんにちは"); + assert!(!s.is_one_byte()); + assert!(!s.contains_only_one_byte()); + Ok(()) + }); +} + +#[test] +fn string_utf8_length_matches_byte_count() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let ascii = "hello"; + assert_eq!(from_utf8(lock, ascii).utf8_length(lock), ascii.len()); + + let unicode = "こんにちは"; + assert_eq!(from_utf8(lock, unicode).utf8_length(lock), unicode.len()); + Ok(()) + }); +} + +// ============================================================================= +// write / write_one_byte / write_utf8 +// ============================================================================= + +#[test] +fn string_write_utf16_into_buffer() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "hi"); + let mut buf = vec![0u16; 2]; + s.write(lock, 0, 2, &mut buf, WriteFlags::None); + assert_eq!(buf, vec!['h' as u16, 'i' as u16]); + Ok(()) + }); +} + +#[test] +fn string_write_utf16_with_offset() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "hello"); + let mut buf = vec![0u16; 3]; + s.write(lock, 2, 3, &mut buf, WriteFlags::None); + assert_eq!(buf, vec!['l' as u16, 'l' as u16, 'o' as u16]); + Ok(()) + }); +} + +#[test] +fn string_write_one_byte_into_buffer() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "abc"); + let mut buf = vec![0u8; 3]; + s.write_one_byte(lock, 0, 3, &mut buf, WriteFlags::None); + assert_eq!(buf, b"abc"); + Ok(()) + }); +} + +#[test] +fn string_write_utf8_into_buffer() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "hello"; + let s = from_utf8(lock, input); + let mut buf = vec![0u8; input.len()]; + let written = s.write_utf8(lock, &mut buf, WriteFlags::None); + assert_eq!(written, input.len()); + assert_eq!(&buf, input.as_bytes()); + Ok(()) + }); +} + +#[test] +fn string_write_utf8_null_terminate() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "hi"; + let s = from_utf8(lock, input); + let mut buf = vec![0u8; input.len() + 1]; + let written = s.write_utf8(lock, &mut buf, WriteFlags::NullTerminate); + assert_eq!(written, input.len() + 1); + assert_eq!(buf[input.len()], 0); + Ok(()) + }); +} + +#[test] +fn string_to_string_roundtrips_ascii() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "Hello, world!"; + assert_eq!(from_utf8(lock, input).to_string(lock), input); + Ok(()) + }); +} + +#[test] +fn string_to_string_roundtrips_unicode() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "🦀 Rust"; + assert_eq!(from_utf8(lock, input).to_string(lock), input); + Ok(()) + }); +} + +// ============================================================================= +// PartialEq / Eq +// ============================================================================= + +#[test] +fn string_eq_same_content() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let a = from_utf8(lock, "equal"); + let b = from_utf8(lock, "equal"); + assert_eq!(a, b); + Ok(()) + }); +} + +#[test] +fn string_ne_different_content() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let a = from_utf8(lock, "foo"); + let b = from_utf8(lock, "bar"); + assert_ne!(a, b); + Ok(()) + }); +} + +#[test] +fn string_empty_eq_empty() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let a = JsString::empty(lock); + let b = JsString::empty(lock); + assert_eq!(a, b); + Ok(()) + }); +} + +// ============================================================================= +// MaybeLocal +// ============================================================================= + +#[test] +fn maybe_local_is_not_empty_on_success() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let maybe = JsString::new_from_utf8(lock, b"hello"); + assert!(!maybe.is_empty()); + Ok(()) + }); +} + +#[test] +fn maybe_local_into_option_some() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let maybe = JsString::new_from_utf8(lock, b"test"); + let opt = maybe.into_option(lock); + assert!(opt.is_some()); + Ok(()) + }); +} + +#[test] +fn maybe_local_from_none_is_empty() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let _ = lock; // ensure we're inside the context + let maybe: MaybeLocal<'_, JsString> = None::>.into(); + assert!(maybe.is_empty()); + Ok(()) + }); +} + +#[test] +fn maybe_local_unwrap_or_returns_default_when_empty() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let default = JsString::empty(lock); + let maybe: MaybeLocal<'_, JsString> = None::>.into(); + let result = maybe.unwrap_or(lock, default); + assert_eq!(result.length(), 0); + Ok(()) + }); +} + +// ============================================================================= +// WriteFlags +// ============================================================================= + +#[test] +fn write_flags_none_is_zero() { + assert_eq!(WriteFlags::None.bits(), 0); +} + +#[test] +fn write_flags_null_terminate_is_one() { + assert_eq!(WriteFlags::NullTerminate.bits(), 1); +} + +#[test] +fn write_flags_replace_invalid_utf8_is_two() { + assert_eq!(WriteFlags::ReplaceInvalidUtf8.bits(), 2); +} + +#[test] +fn write_flags_bitor_combines_values() { + let combined = WriteFlags::NullTerminate | WriteFlags::ReplaceInvalidUtf8; + assert_eq!(combined.bits(), 3); + assert_eq!(combined, WriteFlags::NullTerminateAndReplaceInvalidUtf8); +} + +#[test] +fn write_flags_combined_variant_equals_bitor() { + assert_eq!( + WriteFlags::NullTerminateAndReplaceInvalidUtf8.bits(), + WriteFlags::NullTerminate.bits() | WriteFlags::ReplaceInvalidUtf8.bits() + ); +} + +// ============================================================================= +// Utf8Value +// ============================================================================= + +#[test] +fn utf8_value_length_matches_byte_count() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = "hello".to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.length(), 5); + Ok(()) + }); +} + +#[test] +fn utf8_value_as_str_returns_correct_content() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = "hello world".to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.as_str(), Some("hello world")); + Ok(()) + }); +} + +#[test] +fn utf8_value_as_bytes_matches_utf8_encoding() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "Rust 🦀"; + let val: Local<'_, Value> = input.to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.as_bytes(), input.as_bytes()); + Ok(()) + }); +} + +#[test] +fn utf8_value_from_unicode_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let input = "こんにちは"; + let val: Local<'_, Value> = input.to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.length(), input.len()); + assert_eq!(utf8.as_str(), Some(input)); + Ok(()) + }); +} + +#[test] +fn utf8_value_from_empty_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = "".to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.length(), 0); + assert_eq!(utf8.as_str(), Some("")); + Ok(()) + }); +} + +#[test] +fn utf8_value_as_ptr_is_non_null() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = "test".to_local(lock); + let utf8 = Utf8Value::new(lock, &val); + assert!(!utf8.as_ptr().is_null()); + Ok(()) + }); +} + +#[test] +fn utf8_value_coerces_number_to_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, ctx| { + let val = ctx.eval_raw("42").map_err(|e| e.unwrap_jsg_err(lock))?; + let utf8 = Utf8Value::new(lock, &val); + assert_eq!(utf8.as_str(), Some("42")); + Ok(()) + }); +} + +// ============================================================================= +// Local cast from/to Local +// ============================================================================= + +#[test] +fn string_try_as_from_value() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = "cast me".to_local(lock); + let s = val.try_as::().expect("should cast to String"); + assert_eq!(s.to_string(lock), "cast me"); + Ok(()) + }); +} + +#[test] +fn string_try_as_fails_for_non_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let val: Local<'_, Value> = jsg::Number::new(42.0).to_local(lock); + assert!(val.try_as::().is_none()); + Ok(()) + }); +} + +#[test] +fn string_upcast_to_value() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "upcast"); + let val: Local<'_, Value> = s.into(); + assert!(val.is_string()); + Ok(()) + }); +} + +// ============================================================================= +// String::concat, internalize, get_identity_hash, is_flat, K_MAX_LENGTH +// ============================================================================= + +#[test] +fn string_concat_produces_concatenated_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let left = from_utf8(lock, "hello, "); + let right = from_utf8(lock, "world"); + let result = JsString::concat(lock, left, right); + assert_eq!(result.to_string(lock), "hello, world"); + Ok(()) + }); +} + +#[test] +fn string_concat_with_empty_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "abc"); + let empty = JsString::empty(lock); + let result = JsString::concat(lock, s, empty); + assert_eq!(result.to_string(lock), "abc"); + Ok(()) + }); +} + +#[test] +fn string_internalize_returns_equal_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "intern-me"); + let interned = s.internalize(lock); + assert_eq!(interned.to_string(lock), "intern-me"); + Ok(()) + }); +} + +#[test] +fn string_internalize_twice_returns_equal_content() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "dedup"); + let a = s.internalize(lock); + let b = s.internalize(lock); + assert_eq!(a.to_string(lock), b.to_string(lock)); + Ok(()) + }); +} + +#[test] +fn string_get_identity_hash_is_nonzero() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "hash-me"); + assert_ne!(s.get_identity_hash(), 0); + Ok(()) + }); +} + +#[test] +fn string_get_identity_hash_is_stable() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "stable-hash"); + assert_eq!(s.get_identity_hash(), s.get_identity_hash()); + Ok(()) + }); +} + +#[test] +fn string_is_flat_for_simple_string() { + let harness = crate::Harness::new(); + harness.run_in_context(|lock, _ctx| { + let s = from_utf8(lock, "flat"); + assert!(s.is_flat()); + Ok(()) + }); +} + +#[test] +fn string_max_length_matches_v8() { + // V8's kMaxLength is (1<<28)-16 on 32-bit and (1<<29)-24 on 64-bit. + #[cfg(target_pointer_width = "32")] + assert_eq!(JsString::MAX_LENGTH, (1 << 28) - 16); + #[cfg(target_pointer_width = "64")] + assert_eq!(JsString::MAX_LENGTH, (1 << 29) - 24); +} diff --git a/src/rust/jsg/ffi-inl.h b/src/rust/jsg/ffi-inl.h index 22847773d80..70eb9af97bd 100644 --- a/src/rust/jsg/ffi-inl.h +++ b/src/rust/jsg/ffi-inl.h @@ -36,6 +36,20 @@ inline const v8::Local& local_as_ref_from_ffi(const Local& value) { return *reinterpret_cast*>(ptr_void); } +// MaybeLocal +// v8::MaybeLocal is exactly one Local field at offset 0, so it has the same +// size and layout as ffi::Local (one pointer-sized word, zero means empty). +static_assert(sizeof(v8::MaybeLocal) == sizeof(MaybeLocal), "Size should match"); +static_assert(alignof(v8::MaybeLocal) == alignof(MaybeLocal), "Alignment should match"); + +template +inline MaybeLocal maybe_local_to_ffi(v8::MaybeLocal value) { + size_t result; + auto ptr_void = reinterpret_cast(&result); + new (ptr_void) v8::MaybeLocal(value); + return MaybeLocal{result}; +} + // Global // // ffi::Global stores only the strong v8::Global in `ptr`. diff --git a/src/rust/jsg/ffi.c++ b/src/rust/jsg/ffi.c++ index 01ffd4e696f..40cb4d79605 100644 --- a/src/rust/jsg/ffi.c++ +++ b/src/rust/jsg/ffi.c++ @@ -228,6 +228,121 @@ bool local_is_function(const Local& val) { return ::rust::String(*utf8, utf8.length()); } +// Utf8Value +Utf8Value utf8_value_new(Isolate* isolate, Local value) { + auto* v = new v8::String::Utf8Value(isolate, local_from_ffi(kj::mv(value))); + return Utf8Value{reinterpret_cast(v)}; +} + +void utf8_value_drop(Utf8Value value) { + delete reinterpret_cast(value.ptr); +} + +size_t utf8_value_length(const Utf8Value& value) { + return reinterpret_cast(value.ptr)->length(); +} + +const uint8_t* utf8_value_data(const Utf8Value& value) { + return reinterpret_cast( + **reinterpret_cast(value.ptr)); +} + +// Local +Local local_string_empty(Isolate* isolate) { + return to_ffi(v8::String::Empty(isolate)); +} + +int32_t local_string_length(const Local& value) { + return local_as_ref_from_ffi(value)->Length(); +} + +bool local_string_is_one_byte(const Local& value) { + // Note: IsOneByte() reflects V8's internal string representation, not the logical + // content. A string containing only Latin-1 characters may still return false if V8 + // stores it as two-byte (e.g. after concatenation), i.e. false negatives are possible. + // Use ContainsOnlyOneByte() for a content-based check, keeping in mind that it scans + // the entire string. + return local_as_ref_from_ffi(value)->IsOneByte(); +} + +bool local_string_contains_only_one_byte(const Local& value) { + return local_as_ref_from_ffi(value)->ContainsOnlyOneByte(); +} + +size_t local_string_utf8_length(Isolate* isolate, const Local& value) { + return local_as_ref_from_ffi(value)->Utf8LengthV2(isolate); +} + +void local_string_write_v2(Isolate* isolate, + const Local& value, + uint32_t offset, + uint32_t length, + uint16_t* buffer, + int32_t flags) { + local_as_ref_from_ffi(value)->WriteV2(isolate, offset, length, buffer, flags); +} + +void local_string_write_one_byte_v2(Isolate* isolate, + const Local& value, + uint32_t offset, + uint32_t length, + uint8_t* buffer, + int32_t flags) { + local_as_ref_from_ffi(value)->WriteOneByteV2(isolate, offset, length, buffer, flags); +} + +size_t local_string_write_utf8_v2( + Isolate* isolate, const Local& value, uint8_t* buffer, size_t capacity, int32_t flags) { + return local_as_ref_from_ffi(value)->WriteUtf8V2( + isolate, reinterpret_cast(buffer), capacity, flags); +} + +bool local_string_equals(const Local& value, const Local& other) { + return local_as_ref_from_ffi(value)->StringEquals( + local_as_ref_from_ffi(other)); +} + +bool local_string_is_flat(const Local& value) { + return local_as_ref_from_ffi(value)->IsFlat(); +} + +Local local_string_concat(Isolate* isolate, Local left, Local right) { + return to_ffi(v8::String::Concat(isolate, local_from_ffi(kj::mv(left)), + local_from_ffi(kj::mv(right)))); +} + +Local local_string_internalize(Isolate* isolate, const Local& value) { + return to_ffi(local_as_ref_from_ffi(value)->InternalizeString(isolate)); +} + +int32_t local_string_get_identity_hash(const Local& value) { + return local_as_ref_from_ffi(value)->GetIdentityHash(); +} + +MaybeLocal local_string_new_from_utf8( + Isolate* isolate, const uint8_t* data, int32_t length, bool internalized) { + auto type = internalized ? v8::NewStringType::kInternalized : v8::NewStringType::kNormal; + return maybe_local_to_ffi( + v8::String::NewFromUtf8(isolate, reinterpret_cast(data), type, length)); +} + +MaybeLocal local_string_new_from_one_byte( + Isolate* isolate, const uint8_t* data, int32_t length, bool internalized) { + auto type = internalized ? v8::NewStringType::kInternalized : v8::NewStringType::kNormal; + return maybe_local_to_ffi(v8::String::NewFromOneByte(isolate, data, type, length)); +} + +MaybeLocal local_string_new_from_two_byte( + Isolate* isolate, const uint16_t* data, int32_t length, bool internalized) { + auto type = internalized ? v8::NewStringType::kInternalized : v8::NewStringType::kNormal; + return maybe_local_to_ffi(v8::String::NewFromTwoByte(isolate, data, type, length)); +} + +bool maybe_local_is_empty(const MaybeLocal& value) { + auto ptr_void = reinterpret_cast(&value.ptr); + return reinterpret_cast*>(ptr_void)->IsEmpty(); +} + // Local Local local_function_call( Isolate* isolate, const Local& function, const Local& recv, ::rust::Slice args) { diff --git a/src/rust/jsg/ffi.h b/src/rust/jsg/ffi.h index b3c492e2f7d..26ceb568bf4 100644 --- a/src/rust/jsg/ffi.h +++ b/src/rust/jsg/ffi.h @@ -21,6 +21,7 @@ using Isolate = v8::Isolate; using FunctionCallbackInfo = v8::FunctionCallbackInfo; struct ModuleRegistry; struct Local; +struct MaybeLocal; struct Global; struct TracedReference; struct Realm; @@ -143,6 +144,39 @@ bool local_is_array_buffer_view(const Local& val); bool local_is_function(const Local& val); ::rust::String local_type_of(Isolate* isolate, const Local& val); +// Local +Local local_string_empty(Isolate* isolate); +int32_t local_string_length(const Local& value); +bool local_string_is_one_byte(const Local& value); +bool local_string_contains_only_one_byte(const Local& value); +size_t local_string_utf8_length(Isolate* isolate, const Local& value); +void local_string_write_v2(Isolate* isolate, + const Local& value, + uint32_t offset, + uint32_t length, + uint16_t* buffer, + int32_t flags); +void local_string_write_one_byte_v2(Isolate* isolate, + const Local& value, + uint32_t offset, + uint32_t length, + uint8_t* buffer, + int32_t flags); +size_t local_string_write_utf8_v2( + Isolate* isolate, const Local& value, uint8_t* buffer, size_t capacity, int32_t flags); +bool local_string_equals(const Local& value, const Local& other); +bool local_string_is_flat(const Local& value); +Local local_string_concat(Isolate* isolate, Local left, Local right); +Local local_string_internalize(Isolate* isolate, const Local& value); +int32_t local_string_get_identity_hash(const Local& value); +MaybeLocal local_string_new_from_utf8( + Isolate* isolate, const uint8_t* data, int32_t length, bool internalized); +MaybeLocal local_string_new_from_one_byte( + Isolate* isolate, const uint8_t* data, int32_t length, bool internalized); +MaybeLocal local_string_new_from_two_byte( + Isolate* isolate, const uint16_t* data, int32_t length, bool internalized); +bool maybe_local_is_empty(const MaybeLocal& value); + // Local Local local_function_call( Isolate* isolate, const Local& function, const Local& recv, ::rust::Slice args); @@ -208,6 +242,21 @@ size_t fci_get_length(FunctionCallbackInfo* args); Local fci_get_arg(FunctionCallbackInfo* args, size_t index); void fci_set_return_value(FunctionCallbackInfo* args, Local value); +// Utf8Value — ptr holds a heap-allocated v8::String::Utf8Value*. +// The complete struct definition is generated by CXX from the shared struct in v8.rs. +struct Utf8Value; + +// utf8_value_new converts `value` to a UTF-8 string, producing a heap-allocated copy +// of the UTF-8 bytes that is independent of the V8 heap. The caller must call +// utf8_value_drop to free it. +// utf8_value_data returns a pointer into this copy (not into V8 memory); it is valid +// for the lifetime of the Utf8Value and must not be used after utf8_value_drop. +// utf8_value_length returns the number of bytes (excluding the null terminator). +Utf8Value utf8_value_new(Isolate* isolate, Local value); +void utf8_value_drop(Utf8Value value); +size_t utf8_value_length(const Utf8Value& value); +const uint8_t* utf8_value_data(const Utf8Value& value); + struct ModuleRegistry { virtual ~ModuleRegistry() = default; virtual void addBuiltinModule( diff --git a/src/rust/jsg/v8.rs b/src/rust/jsg/v8.rs index 22c3cb4c241..e9c7b0a1625 100644 --- a/src/rust/jsg/v8.rs +++ b/src/rust/jsg/v8.rs @@ -48,6 +48,14 @@ pub mod ffi { ptr: usize, } + /// Mirrors `v8::MaybeLocal`: a single pointer-sized word where `ptr == 0` means empty. + /// This matches V8's internal layout exactly — `v8::MaybeLocal` holds one `Local` + /// field which is itself one `internal::Address*` (one pointer word). + #[derive(Debug)] + struct MaybeLocal { + ptr: usize, + } + #[derive(Debug)] struct Global { /// Strong `v8::Global` handle. Always valid when non-zero. @@ -66,6 +74,11 @@ pub mod ffi { ptr: usize, } + #[derive(Debug)] + struct Utf8Value { + ptr: usize, + } + #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum ExceptionType { OperationError, @@ -196,6 +209,64 @@ pub mod ffi { pub unsafe fn local_is_function(value: &Local) -> bool; pub unsafe fn local_type_of(isolate: *mut Isolate, value: &Local) -> String; + // Local + pub unsafe fn local_string_length(value: &Local) -> i32; + pub unsafe fn local_string_is_one_byte(value: &Local) -> bool; + pub unsafe fn local_string_contains_only_one_byte(value: &Local) -> bool; + pub unsafe fn local_string_utf8_length(isolate: *mut Isolate, value: &Local) -> usize; + pub unsafe fn local_string_write_v2( + isolate: *mut Isolate, + value: &Local, + offset: u32, + length: u32, + buffer: *mut u16, + flags: i32, + ); + pub unsafe fn local_string_write_one_byte_v2( + isolate: *mut Isolate, + value: &Local, + offset: u32, + length: u32, + buffer: *mut u8, + flags: i32, + ); + pub unsafe fn local_string_write_utf8_v2( + isolate: *mut Isolate, + value: &Local, + buffer: *mut u8, + capacity: usize, + flags: i32, + ) -> usize; + pub unsafe fn local_string_empty(isolate: *mut Isolate) -> Local; + pub unsafe fn local_string_equals(lhs: &Local, rhs: &Local) -> bool; + pub unsafe fn local_string_is_flat(value: &Local) -> bool; + pub unsafe fn local_string_concat( + isolate: *mut Isolate, + left: Local, + right: Local, + ) -> Local; + pub unsafe fn local_string_internalize(isolate: *mut Isolate, value: &Local) -> Local; + pub unsafe fn local_string_get_identity_hash(value: &Local) -> i32; + pub unsafe fn local_string_new_from_utf8( + isolate: *mut Isolate, + data: *const u8, + length: i32, + internalized: bool, + ) -> MaybeLocal; + pub unsafe fn local_string_new_from_one_byte( + isolate: *mut Isolate, + data: *const u8, + length: i32, + internalized: bool, + ) -> MaybeLocal; + pub unsafe fn local_string_new_from_two_byte( + isolate: *mut Isolate, + data: *const u16, + length: i32, + internalized: bool, + ) -> MaybeLocal; + pub unsafe fn maybe_local_is_empty(value: &MaybeLocal) -> bool; + // Local pub unsafe fn local_function_call( isolate: *mut Isolate, @@ -431,6 +502,11 @@ pub mod ffi { isolate: *mut Isolate, constructor: &Global, /* v8::Global */ ) -> Local /* v8::Local */; + + pub unsafe fn utf8_value_new(isolate: *mut Isolate, value: Local) -> Utf8Value; + pub unsafe fn utf8_value_drop(value: Utf8Value); + pub unsafe fn utf8_value_length(value: &Utf8Value) -> usize; + pub unsafe fn utf8_value_data(value: &Utf8Value) -> *const u8; } /// Module visibility level, mirroring workerd::jsg::ModuleType from modules.capnp. @@ -482,12 +558,214 @@ impl std::fmt::Display for ffi::ExceptionType { // Marker types for Local #[derive(Debug)] pub struct Value; +/// Marker for `v8::String` handles. +#[derive(Debug)] +pub struct String; + +impl String { + /// Maximum length of a V8 string in UTF-16 code units. + /// + /// Matches `v8::String::kMaxLength`. Attempting to create a string longer than + /// this will cause V8 to return an empty `MaybeLocal`. + pub const MAX_LENGTH: i32 = if cfg!(target_pointer_width = "32") { + (1 << 28) - 16 + } else { + (1 << 29) - 24 + }; + + /// Returns the empty string singleton. + /// + /// Corresponds to `v8::String::Empty()`. + pub fn empty<'a>(lock: &mut crate::Lock) -> Local<'a, Self> { + let isolate = lock.isolate(); + // SAFETY: Lock guarantees the isolate is locked and a HandleScope is active. + unsafe { Local::from_ffi(isolate, ffi::local_string_empty(isolate.as_ffi())) } + } + + /// Creates a new string from a `&str`. + /// + /// Corresponds to `v8::String::NewFromUtf8`. + pub fn new_from_str<'a>(lock: &mut crate::Lock, data: &str) -> MaybeLocal<'a, Self> { + Self::new_from_utf8(lock, data.as_bytes()) + } + + /// Creates an internalized string from a `&str`. + /// + /// Equal strings will be pointer-equal after internalization, which speeds up + /// property-key lookups at the cost of a hash-table probe on creation. + /// + /// Corresponds to `v8::String::NewFromUtf8` with `kInternalized`. + pub fn new_internalized_from_str<'a>( + lock: &mut crate::Lock, + data: &str, + ) -> MaybeLocal<'a, Self> { + Self::new_internalized_from_utf8(lock, data.as_bytes()) + } + + /// Creates a new string from a UTF-8 string literal. + /// + /// Panics at runtime if `literal.len()` exceeds [`Self::MAX_LENGTH`], matching + /// the compile-time `static_assert` that `v8::String::NewFromUtf8Literal` performs. + /// + /// # Panics + /// + /// Panics if `literal.len()` exceeds [`Self::MAX_LENGTH`]. + /// + /// Corresponds to `v8::String::NewFromUtf8Literal`. + pub fn new_from_utf8_literal<'a>( + lock: &mut crate::Lock, + literal: &'static str, + ) -> MaybeLocal<'a, Self> { + assert!( + literal.len() <= Self::MAX_LENGTH as usize, + "string literal exceeds v8::String::kMaxLength" + ); + Self::new_from_utf8(lock, literal.as_bytes()) + } + + /// Creates a new string from UTF-8 data. + /// + /// Returns an empty `MaybeLocal` if V8 cannot allocate the string. + /// + /// Strings longer than `i32::MAX` bytes are silently truncated to `i32::MAX` bytes + /// to satisfy V8's `int`-typed length parameter; in practice V8 will reject strings + /// that large anyway due to heap limits. + /// + /// Corresponds to `v8::String::NewFromUtf8`. + pub fn new_from_utf8<'a>(lock: &mut crate::Lock, data: &[u8]) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked and a HandleScope is active; + // data.as_ptr() and len describe a valid byte slice. + let handle = + unsafe { ffi::local_string_new_from_utf8(isolate.as_ffi(), data.as_ptr(), len, false) }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Creates an internalized string from UTF-8 data. + /// + /// Equal strings will be pointer-equal after internalization, which speeds up + /// property-key lookups at the cost of a hash-table probe on creation. + /// + /// Strings longer than `i32::MAX` bytes are silently truncated to `i32::MAX` bytes. + /// + /// Corresponds to `v8::String::NewFromUtf8` with `kInternalized`. + pub fn new_internalized_from_utf8<'a>( + lock: &mut crate::Lock, + data: &[u8], + ) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked and a HandleScope is active; + // data.as_ptr() and len describe a valid byte slice. + let handle = + unsafe { ffi::local_string_new_from_utf8(isolate.as_ffi(), data.as_ptr(), len, true) }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Creates a new string from Latin-1 (one-byte) data. + /// + /// Each byte is mapped to the Unicode code point with the same value. + /// Returns an empty `MaybeLocal` if V8 cannot allocate the string. + /// + /// Strings longer than `i32::MAX` bytes are silently truncated to `i32::MAX` bytes. + /// + /// Corresponds to `v8::String::NewFromOneByte`. + pub fn new_from_one_byte<'a>(lock: &mut crate::Lock, data: &[u8]) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked; data.as_ptr() and len are valid. + let handle = unsafe { + ffi::local_string_new_from_one_byte(isolate.as_ffi(), data.as_ptr(), len, false) + }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Creates an internalized string from Latin-1 (one-byte) data. + /// + /// Equal strings will be pointer-equal after internalization. + /// Strings longer than `i32::MAX` bytes are silently truncated to `i32::MAX` bytes. + /// + /// Corresponds to `v8::String::NewFromOneByte` with `kInternalized`. + pub fn new_internalized_from_one_byte<'a>( + lock: &mut crate::Lock, + data: &[u8], + ) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked; data.as_ptr() and len are valid. + let handle = unsafe { + ffi::local_string_new_from_one_byte(isolate.as_ffi(), data.as_ptr(), len, true) + }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Creates a new string from UTF-16 data. + /// + /// Returns an empty `MaybeLocal` if V8 cannot allocate the string. + /// + /// Strings longer than `i32::MAX` code units are silently truncated to `i32::MAX` code units. + /// + /// Corresponds to `v8::String::NewFromTwoByte`. + pub fn new_from_two_byte<'a>(lock: &mut crate::Lock, data: &[u16]) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked; data.as_ptr() and len are valid. + let handle = unsafe { + ffi::local_string_new_from_two_byte(isolate.as_ffi(), data.as_ptr(), len, false) + }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Creates an internalized string from UTF-16 data. + /// + /// Equal strings will be pointer-equal after internalization. + /// Strings longer than `i32::MAX` code units are silently truncated to `i32::MAX` code units. + /// + /// Corresponds to `v8::String::NewFromTwoByte` with `kInternalized`. + pub fn new_internalized_from_two_byte<'a>( + lock: &mut crate::Lock, + data: &[u16], + ) -> MaybeLocal<'a, Self> { + let isolate = lock.isolate(); + let len = i32::try_from(data.len()).unwrap_or(i32::MAX); + // SAFETY: Lock guarantees the isolate is locked; data.as_ptr() and len are valid. + let handle = unsafe { + ffi::local_string_new_from_two_byte(isolate.as_ffi(), data.as_ptr(), len, true) + }; + // SAFETY: handle is a valid MaybeLocal from V8; ptr==0 means empty. + unsafe { MaybeLocal::from_ffi(handle) } + } + + /// Concatenates two strings. + /// + /// Corresponds to `v8::String::Concat()`. + pub fn concat<'a>( + lock: &mut crate::Lock, + left: Local<'a, Self>, + right: Local<'a, Self>, + ) -> Local<'a, Self> { + let isolate = lock.isolate(); + // SAFETY: Lock guarantees the isolate is locked and a HandleScope is active. + unsafe { + Local::from_ffi( + isolate, + ffi::local_string_concat(isolate.as_ffi(), left.into_ffi(), right.into_ffi()), + ) + } + } +} impl Display for Local<'_, Value> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // SAFETY: isolate is valid and locked (guaranteed by the Local's invariant). let mut lock = unsafe { Lock::from_isolate_ptr(self.isolate.as_ffi()) }; - match String::from_js(&mut lock, self.clone()) { + match ::from_js(&mut lock, self.clone()) { Ok(value) => write!(f, "{value}"), Err(e) => write!(f, "{e:?}"), } @@ -729,7 +1007,7 @@ impl<'a, T> Local<'a, T> { /// "bigint", "string", "symbol", "function", or "object". /// /// Note: For `null`, this returns "object" (JavaScript's historical behavior). - pub fn type_of(&self) -> String { + pub fn type_of(&self) -> std::string::String { // SAFETY: handle is valid within the current HandleScope. unsafe { ffi::local_type_of(self.isolate.as_ffi(), &self.handle) } } @@ -777,6 +1055,7 @@ macro_rules! impl_as { impl_as!(Function, is_function); impl_as!(Object, is_object); impl_as!(Array, is_array); +impl_as!(String, is_string); // Value-specific implementations impl<'a> Local<'a, Value> { @@ -803,6 +1082,15 @@ impl PartialEq for Local<'_, Value> { } } +impl PartialEq for Local<'_, String> { + fn eq(&self, other: &Self) -> bool { + // SAFETY: Both handles are valid V8 String handles. + unsafe { ffi::local_string_equals(&self.handle, &other.handle) } + } +} + +impl Eq for Local<'_, String> {} + impl Local<'_, Function> { /// Calls this function and converts the result via [`FromJS`]. /// @@ -875,6 +1163,7 @@ macro_rules! impl_local_cast { } // Upcasts to Value +impl_local_cast!(String -> Value, is_string); impl_local_cast!(Object -> Value, is_object); impl_local_cast!(Function -> Value, is_function); impl_local_cast!(Array -> Value, is_array); @@ -1179,6 +1468,488 @@ impl_typed_array!(Float64Array, f64, local_float64_array_get); impl_typed_array!(BigInt64Array, i64, local_bigint64_array_get); impl_typed_array!(BigUint64Array, u64, local_biguint64_array_get); +// ============================================================================= +// `String`-specific implementations +// ============================================================================= + +/// Write flags matching `v8::String::WriteFlags`. +/// +/// These correspond directly to `kNone`, `kNullTerminate`, and `kReplaceInvalidUtf8`. +/// Flags can be combined with `|` via the `BitOr` impl. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +#[repr(i32)] +pub enum WriteFlags { + /// No special write flags. + #[default] + None = 0, + /// Include a null terminator in the output. The buffer must have space for it. + NullTerminate = 1, + /// Replace invalid UTF-8/UTF-16 sequences with the Unicode replacement character U+FFFD. + /// Set this to guarantee valid UTF-8 output from `write_utf8`. + ReplaceInvalidUtf8 = 2, + /// Combines `NullTerminate` and `ReplaceInvalidUtf8`. + /// + /// Equivalent to `NullTerminate | ReplaceInvalidUtf8`. This variant exists so that + /// the enum covers every value in `0..=3`, making the `BitOr` transmute sound. + NullTerminateAndReplaceInvalidUtf8 = 3, +} + +impl WriteFlags { + /// Returns the underlying `i32` bitmask value. + #[inline] + pub fn bits(self) -> i32 { + self as i32 + } +} + +impl std::ops::BitOr for WriteFlags { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + // SAFETY: WriteFlags variants cover all values 0..=3; OR-ing any two + // produces a value in that range, all of which have valid discriminants. + unsafe { std::mem::transmute(self.bits() | rhs.bits()) } + } +} + +/// Rust equivalent of `v8::MaybeLocal` — either a `Local` or empty. +/// +/// Mirrors V8's layout exactly: one pointer-sized word where `ptr == 0` is empty. +/// No boxing, no `Option` overhead, no stored isolate — this is a direct ABI-compatible +/// view of the `ffi::MaybeLocal` returned across the FFI boundary. +/// +/// Methods that produce a `Local` require a `&mut Lock` so they can recover the +/// isolate pointer on demand, matching the pattern used by `Local` constructors. +pub struct MaybeLocal<'a, T> { + /// The raw FFI handle. `ptr == 0` means empty (matches `v8::MaybeLocal` default). + handle: ffi::MaybeLocal, + _marker: PhantomData<(&'a (), T)>, +} + +impl<'a, T> MaybeLocal<'a, T> { + /// Wraps a raw `ffi::MaybeLocal` returned from the FFI layer. + /// + /// # Safety + /// If `handle.ptr != 0`, the handle must point to a live V8 value within the + /// current `HandleScope` of the active isolate. + pub unsafe fn from_ffi(handle: ffi::MaybeLocal) -> Self { + Self { + handle, + _marker: PhantomData, + } + } + + /// Returns `true` if this `MaybeLocal` is empty. + pub fn is_empty(&self) -> bool { + // SAFETY: handle is a valid ffi::MaybeLocal; no isolate or HandleScope needed. + unsafe { ffi::maybe_local_is_empty(&self.handle) } + } + + /// Returns the contained value as a `Local` without consuming `self`, or `None` if empty. + /// + /// Copies the underlying V8 handle pointer so that both `self` and the returned `Local` + /// refer to the same V8 value. V8 `Local` handles are non-owning references into the + /// `HandleScope` stack; `local_clone` is a cheap pointer copy (not a deep clone), and + /// `local_drop` is a no-op for locals. Use [`into_option`](Self::into_option) to transfer + /// ownership without the copy when `self` is no longer needed. + pub fn to_local(&self, lock: &mut crate::Lock) -> Option> { + // SAFETY: handle is a valid ffi::MaybeLocal; no isolate or HandleScope needed. + if unsafe { ffi::maybe_local_is_empty(&self.handle) } { + return None; + } + // local_clone is a bitwise pointer copy — both the MaybeLocal and the returned Local + // refer to the same HandleScope entry. This is safe because Local handles are + // non-owning and local_drop is a no-op; the HandleScope itself manages the lifetime. + // SAFETY: handle is non-empty and points to a live V8 value in the current HandleScope. + let cloned = unsafe { + ffi::local_clone(&ffi::Local { + ptr: self.handle.ptr, + }) + }; + // SAFETY: handle is non-empty, isolate is valid, and cloned is a valid Local. + Some(unsafe { Local::from_ffi(lock.isolate(), cloned) }) + } + + /// Converts into `Option>`, consuming `self`. + /// + /// Transfers ownership of the underlying handle without cloning, which is more efficient + /// than [`to_local`](Self::to_local) when `self` is no longer needed after the call. + pub fn into_option(self, lock: &mut crate::Lock) -> Option> { + if self.handle.ptr == 0 { + return None; + } + // Transfer ownership: zero out the ptr so Drop becomes a no-op, then wrap in Local. + let ptr = self.handle.ptr; + // SAFETY: We are taking ownership of the handle; zeroing the ptr prevents double-free. + let mut this = std::mem::ManuallyDrop::new(self); + this.handle.ptr = 0; + // SAFETY: ptr is non-zero (checked above), isolate is valid, handle ownership transferred. + Some(unsafe { Local::from_ffi(lock.isolate(), ffi::Local { ptr }) }) + } + + /// Unwraps the value, panicking if empty. + /// + /// # Panics + /// + /// Panics if the `MaybeLocal` is empty. + pub fn unwrap(self, lock: &mut crate::Lock) -> Local<'a, T> { + self.into_option(lock).expect("MaybeLocal is empty") + } + + /// Returns the contained `Local`, or `default` if empty. + pub fn unwrap_or(self, lock: &mut crate::Lock, default: Local<'a, T>) -> Local<'a, T> { + self.into_option(lock).unwrap_or(default) + } +} + +impl Drop for MaybeLocal<'_, T> { + fn drop(&mut self) { + if self.handle.ptr != 0 { + let handle = std::mem::replace(&mut self.handle, ffi::MaybeLocal { ptr: 0 }); + // SAFETY: handle is a valid non-empty V8 handle being released. + unsafe { ffi::local_drop(ffi::Local { ptr: handle.ptr }) }; + } + } +} + +impl<'a, T> From>> for MaybeLocal<'a, T> { + /// Constructs a `MaybeLocal` from an `Option>`. + /// `None` produces an empty handle (`ptr == 0`); `Some(local)` reuses its pointer. + fn from(opt: Option>) -> Self { + let ptr = match opt { + None => 0, + Some(local) => { + // SAFETY: we take the handle's ptr out without dropping the Local so the + // handle slot stays alive in the HandleScope. + let ptr = local.handle.ptr; + std::mem::forget(local); + ptr + } + }; + Self { + handle: ffi::MaybeLocal { ptr }, + _marker: PhantomData, + } + } +} + +impl Local<'_, String> { + // Instance methods — correspond to `v8::String` member functions + + /// Returns the number of characters (UTF-16 code units) in the string. + /// + /// Returns `i32` to match the V8 API (`v8::String::Length()` returns `int`). + /// V8 enforces a maximum string length well below `i32::MAX`, so the result + /// is always non-negative. + /// + /// Corresponds to `v8::String::Length()`. + #[inline] + pub fn length(&self) -> i32 { + // SAFETY: self.handle is a valid V8 String handle. + unsafe { ffi::local_string_length(&self.handle) } + } + + /// Returns `true` if the string is represented internally as one-byte (Latin-1). + /// + /// Corresponds to `v8::String::IsOneByte()`. + #[inline] + pub fn is_one_byte(&self) -> bool { + // SAFETY: self.handle is a valid V8 String handle. + unsafe { ffi::local_string_is_one_byte(&self.handle) } + } + + /// Returns `true` if all characters in the string fit in one byte (Latin-1). + /// + /// Unlike `is_one_byte()`, this scans the entire string and may be slow for + /// two-byte strings that happen to contain only Latin-1 characters. + /// + /// Corresponds to `v8::String::ContainsOnlyOneByte()`. + #[inline] + pub fn contains_only_one_byte(&self) -> bool { + // SAFETY: self.handle is a valid V8 String handle. + unsafe { ffi::local_string_contains_only_one_byte(&self.handle) } + } + + /// Returns the number of bytes required to encode the string as UTF-8. + /// + /// Does not include a null terminator. + /// + /// Corresponds to `v8::String::Utf8LengthV2()`. + #[inline] + pub fn utf8_length(&self, lock: &mut crate::Lock) -> usize { + // SAFETY: Lock guarantees the isolate is locked; self.handle is a valid String handle. + unsafe { ffi::local_string_utf8_length(lock.isolate().as_ffi(), &self.handle) } + } + + /// Writes the string as UTF-16 code units into `buffer`. + /// + /// `offset` is the index of the first character to write; `length` is the + /// maximum number of characters to write. `flags` is a combination of + /// [`WriteFlags`] variants. + /// + /// # Panics + /// + /// Panics if `buffer.len() < length`. + /// + /// Corresponds to `v8::String::WriteV2()`. + pub fn write( + &self, + lock: &mut crate::Lock, + offset: u32, + length: u32, + buffer: &mut [u16], + flags: WriteFlags, + ) { + assert!( + buffer.len() >= length as usize, + "buffer too small for requested length" + ); + // SAFETY: Lock guarantees the isolate is locked; buffer is valid and large enough. + unsafe { + ffi::local_string_write_v2( + lock.isolate().as_ffi(), + &self.handle, + offset, + length, + buffer.as_mut_ptr(), + flags.bits(), + ); + } + } + + /// Writes the string as Latin-1 bytes into `buffer`. + /// + /// Only meaningful when `is_one_byte()` returns `true`; characters outside + /// the Latin-1 range are truncated. + /// + /// # Panics + /// + /// Panics if `buffer.len() < length`. + /// + /// Corresponds to `v8::String::WriteOneByteV2()`. + pub fn write_one_byte( + &self, + lock: &mut crate::Lock, + offset: u32, + length: u32, + buffer: &mut [u8], + flags: WriteFlags, + ) { + assert!( + buffer.len() >= length as usize, + "buffer too small for requested length" + ); + // SAFETY: Lock guarantees the isolate is locked; buffer is valid and large enough. + unsafe { + ffi::local_string_write_one_byte_v2( + lock.isolate().as_ffi(), + &self.handle, + offset, + length, + buffer.as_mut_ptr(), + flags.bits(), + ); + } + } + + /// Writes the string as UTF-8 into `buffer`. + /// + /// Returns the number of bytes written. `flags` is a combination of + /// [`WriteFlags`] constants. + /// + /// Corresponds to `v8::String::WriteUtf8V2()`. + pub fn write_utf8( + &self, + lock: &mut crate::Lock, + buffer: &mut [u8], + flags: WriteFlags, + ) -> usize { + // SAFETY: Lock guarantees the isolate is locked; buffer is valid for `capacity` bytes. + unsafe { + ffi::local_string_write_utf8_v2( + lock.isolate().as_ffi(), + &self.handle, + buffer.as_mut_ptr(), + buffer.len(), + flags.bits(), + ) + } + } + + // ------------------------------------------------------------------------- + // Convenience helpers + // ------------------------------------------------------------------------- + + /// Decodes the string to an owned Rust `String` via UTF-8. + /// + /// Allocates a buffer using `utf8_length`, writes into it, and converts. + pub fn to_string(&self, lock: &mut crate::Lock) -> std::string::String { + let byte_len = self.utf8_length(lock); + let mut buf = vec![0u8; byte_len]; + let written = self.write_utf8(lock, &mut buf, WriteFlags::None); + buf.truncate(written); + // V8 guarantees valid UTF-8 output when REPLACE_INVALID_UTF8 is not set and + // the string was originally created from valid data. Use from_utf8 to avoid a + // redundant allocation in the common (valid UTF-8) case; fall back to + // from_utf8_lossy only when the bytes are not valid UTF-8 (e.g. two-byte strings + // with unpaired surrogates). + std::string::String::from_utf8(buf) + .unwrap_or_else(|e| std::string::String::from_utf8_lossy(e.as_bytes()).into_owned()) + } + + // ------------------------------------------------------------------------- + // Additional string operations + // ------------------------------------------------------------------------- + + /// Returns an internalized version of this string. + /// + /// If an equal internalized string already exists in V8's string table it is + /// returned; otherwise a new internalized copy is created. The result is + /// pointer-equal to any other internalized string with the same content. + /// + /// Corresponds to `v8::String::InternalizeString()`. + #[must_use] + pub fn internalize(&self, lock: &mut crate::Lock) -> Self { + let isolate = lock.isolate(); + // SAFETY: Lock guarantees the isolate is locked; self.handle is a valid String handle. + unsafe { + Local::from_ffi( + isolate, + ffi::local_string_internalize(isolate.as_ffi(), &self.handle), + ) + } + } + + /// Returns the identity hash for this string. + /// + /// The hash is stable for the lifetime of the string and is never `0`, + /// but is not guaranteed to be unique across different strings. + /// + /// Corresponds to `v8::Name::GetIdentityHash()`. + #[inline] + pub fn get_identity_hash(&self) -> i32 { + // SAFETY: self.handle is a valid V8 String handle. + unsafe { ffi::local_string_get_identity_hash(&self.handle) } + } + + /// Returns `true` if the string has a flat (contiguous) internal representation. + /// + /// A flat string stores its characters in a single contiguous buffer, which + /// is required by some V8 APIs. Newly created strings are usually flat; cons + /// strings produced by concatenation may not be. + /// + /// Note: This method is available via a Cloudflare-specific V8 patch + /// (`0029-Add-v8-String-IsFlat-API.patch`). + /// + /// Corresponds to `v8::String::IsFlat()`. + #[inline] + pub fn is_flat(&self) -> bool { + // SAFETY: self.handle is a valid V8 String handle. + unsafe { ffi::local_string_is_flat(&self.handle) } + } +} + +// ============================================================================= +// `Utf8Value` +// ============================================================================= + +/// Rust equivalent of `v8::String::Utf8Value`. +/// +/// Converts any V8 value to its UTF-8 string representation (analogous to calling +/// `.toString()` in JavaScript) and holds the result for the duration of its lifetime. +/// +/// The UTF-8 bytes are a **heap-allocated copy** independent of the V8 heap — the data +/// remains valid and stable for the full lifetime of this `Utf8Value` regardless of GC +/// activity. +/// +/// If the value cannot be converted to a string (e.g. a `Symbol`), V8 stores a null +/// pointer internally. In that case [`as_ptr`](Self::as_ptr) returns null, +/// [`length`](Self::length) returns `0`, and [`as_bytes`](Self::as_bytes) / +/// [`as_str`](Self::as_str) return empty slices. +/// +/// # Example +/// +/// ```ignore +/// let utf8 = Utf8Value::new(lock, &value); +/// println!("{}", utf8.as_str().unwrap_or("")); +/// ``` +pub struct Utf8Value { + inner: ffi::Utf8Value, +} + +impl Utf8Value { + /// Constructs a `Utf8Value` by converting `value` to its UTF-8 string representation. + /// + /// Produces a heap-allocated copy of the UTF-8 bytes that is independent of the V8 + /// heap. If `value` cannot be converted to a string (e.g. a `Symbol`), the internal + /// data pointer will be null and [`length`](Self::length) will return `0`. + /// + /// Corresponds to `v8::String::Utf8Value(isolate, obj)`. + pub fn new(lock: &mut Lock, value: &Local<'_, Value>) -> Self { + // SAFETY: Lock guarantees the isolate is locked and a HandleScope is active. + // local_clone produces a cheap handle copy matching V8's by-value constructor semantics. + let inner = unsafe { + ffi::utf8_value_new(lock.isolate().as_ffi(), ffi::local_clone(value.as_ffi())) + }; + Self { inner } + } + + /// Returns the number of UTF-8 bytes in the string, excluding the null terminator. + /// + /// Returns `0` if V8 could not convert the value to a string. + /// + /// Corresponds to `v8::String::Utf8Value::length()`. + #[inline] + pub fn length(&self) -> usize { + // SAFETY: self.inner is a valid Utf8Value. + unsafe { ffi::utf8_value_length(&self.inner) } + } + + /// Returns a raw pointer to the null-terminated UTF-8 bytes stored in this copy. + /// + /// The pointer points into a heap-allocated buffer owned by this `Utf8Value`, not into + /// V8 memory. It is valid for the lifetime of this `Utf8Value`. + /// + /// Returns null if V8 could not convert the value to a string (e.g. a `Symbol`). + /// + /// Corresponds to `v8::String::Utf8Value::operator*()`. + #[inline] + pub fn as_ptr(&self) -> *const u8 { + // SAFETY: self.inner is a valid Utf8Value. + unsafe { ffi::utf8_value_data(&self.inner) } + } + + /// Returns the UTF-8 content as a byte slice. + /// + /// Returns an empty slice if V8 could not convert the value to a string (e.g. a `Symbol`), + /// in which case `operator*()` returns a null pointer. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + let ptr = self.as_ptr(); + if ptr.is_null() { + return &[]; + } + // SAFETY: ptr is non-null and points to length() valid bytes for the lifetime of self. + unsafe { std::slice::from_raw_parts(ptr, self.length()) } + } + + /// Returns the UTF-8 content as a `&str`, or `None` if the bytes are not valid UTF-8. + #[inline] + pub fn as_str(&self) -> Option<&str> { + std::str::from_utf8(self.as_bytes()).ok() + } +} + +impl Drop for Utf8Value { + fn drop(&mut self) { + let inner = ffi::Utf8Value { + ptr: self.inner.ptr, + }; + // SAFETY: self.inner is a valid Utf8Value being released. + unsafe { ffi::utf8_value_drop(inner) }; + } +} + // Object-specific implementations impl<'a> Local<'a, Object> { pub fn set(&mut self, lock: &mut Lock, key: &str, value: Local<'a, Value>) { @@ -1382,7 +2153,7 @@ macro_rules! impl_to_local_value_integer { impl_to_local_value_integer!(u8, u16, u32, i8, i16, i32); -impl ToLocalValue for String { +impl ToLocalValue for std::string::String { fn to_local<'a>(&self, lock: &mut Lock) -> Local<'a, Value> { self.as_str().to_local(lock) } diff --git a/src/rust/jsg/wrappable.rs b/src/rust/jsg/wrappable.rs index 77f8ee13d9d..771cd2c5e62 100644 --- a/src/rust/jsg/wrappable.rs +++ b/src/rust/jsg/wrappable.rs @@ -156,7 +156,7 @@ macro_rules! impl_primitive { }; } -impl_primitive!(String, "string", is_string, unwrap_string); +impl_primitive!(std::string::String, "string", is_string, unwrap_string); impl_primitive!(bool, "boolean", is_boolean, unwrap_boolean); // Number implementation for JavaScript numbers