From 4c8f16c3dc320c867547d37364ce194c5e5afdcc Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Tue, 30 May 2023 13:45:40 -0400 Subject: [PATCH 1/8] Implement char extension in Rust tools --- bril-rs/Cargo.toml | 6 +- bril-rs/bril2json/Cargo.toml | 3 +- bril-rs/bril2json/src/bril_grammar.lalrpop | 6 +- bril-rs/bril2json/src/lib.rs | 14 ++++ bril-rs/src/conversion.rs | 16 +++++ bril-rs/src/program.rs | 80 +++++++++++++++++++++ brilirs/Cargo.toml | 5 +- brilirs/Makefile | 1 + brilirs/src/check.rs | 49 +++++++++++++ brilirs/src/error.rs | 4 ++ brilirs/src/interp.rs | 81 +++++++++++++++++++++- docs/tools/brilirs.md | 3 +- docs/tools/rust.md | 3 +- 13 files changed, 260 insertions(+), 11 deletions(-) diff --git a/bril-rs/Cargo.toml b/bril-rs/Cargo.toml index bcc11148d..e9a0d5948 100644 --- a/bril-rs/Cargo.toml +++ b/bril-rs/Cargo.toml @@ -17,6 +17,7 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] thiserror = "1.0" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } +encode_unicode = { version = "1.0.0", optional = true} [features] float = [] @@ -25,6 +26,7 @@ ssa = [] speculate = [] position = [] import = [] +char = ["dep:encode_unicode"] [[example]] name = "bril2txt" @@ -33,10 +35,10 @@ path = "examples/bril2txt.rs" # However this currently does not work as expected and is being hashed out in https://github.com/rust-lang/rfcs/pull/3020 and https://github.com/rust-lang/rfcs/pull/2887 # Until a solution is reached, I'm using `required-features` so that these features must be passed by flag. This is less ergonomic at the moment, however the user will get a nicer error that they need a feature flag instead of an Result::unwrap() error. # Note: See dev-dependencies for a hack to not need the user to pass that feature flag. -required-features = ["memory", "float", "ssa", "speculate", "position", "import"] +required-features = ["memory", "float", "ssa", "speculate", "position", "import", "char"] [dev-dependencies] # trick to enable all features in test # This is actually really hacky because it is used in all tests/examples/benchmarks but since we currently only have one example this works for enabling the following feature flags for our users. # If the above rfcs every get resolved, then dev-dependencies will no longer be needed. -bril-rs = { path = ".", features = ["memory", "float", "ssa", "speculate", "position", "import"] } \ No newline at end of file +bril-rs = { path = ".", features = ["memory", "float", "ssa", "speculate", "position", "import", "char"] } diff --git a/bril-rs/bril2json/Cargo.toml b/bril-rs/bril2json/Cargo.toml index 02620bd65..9387bd5d7 100644 --- a/bril-rs/bril2json/Cargo.toml +++ b/bril-rs/bril2json/Cargo.toml @@ -15,6 +15,7 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] [dependencies] clap = { version = "4.3", features = ["derive"] } +encode_unicode = "1.0.0" lalrpop-util = { version = "0.20", features = ["lexer"] } regex = "1.8" @@ -25,4 +26,4 @@ lalrpop = "0.20" [dependencies.bril-rs] version = "0.1.0" path = "../../bril-rs" -features = ["ssa", "memory", "float", "speculate", "position", "import"] +features = ["ssa", "memory", "float", "speculate", "position", "import", "char"] diff --git a/bril-rs/bril2json/src/bril_grammar.lalrpop b/bril-rs/bril2json/src/bril_grammar.lalrpop index 124440954..bcfd0b43d 100644 --- a/bril-rs/bril2json/src/bril_grammar.lalrpop +++ b/bril-rs/bril2json/src/bril_grammar.lalrpop @@ -20,7 +20,7 @@ use std::str::FromStr; use std::path::PathBuf; -use crate::{Lines, ParsingArgs}; +use crate::{Lines, ParsingArgs, escape_control_chars}; use bril_rs::{AbstractProgram, AbstractFunction, AbstractArgument, AbstractCode, AbstractInstruction, ConstOps, AbstractType, Literal, Import, ImportedFunction}; grammar(lines : &Lines); @@ -33,6 +33,7 @@ match { r"(\+|-)?[0-9]+" => INT_TOKEN, // int r"(\+|-)?(((([0-9]+\.?[0-9]*)|(\.[0-9]+))(E|e)(\+|-)?[0-9]+)|(([0-9]+\.[0-9]*)|(\.[0-9]+)))" => FLOAT_TOKEN, // https://stackoverflow.com/questions/12643009/regular-expression-for-floating-point-numbers r"(_|%|[[:alpha:]])(_|%|\.|[[:alnum:]])*" => IDENT_TOKEN, + r"('.')|('\\[0abtnvfr]')" => CHAR_TOKEN, r#""[^"]*""# => STRING_TOKEN, _ } @@ -189,6 +190,7 @@ Literal: Literal = { => Literal::Int(n), => Literal::Bool(b), => Literal::Float(f), + => Literal::Char(c), } Num: i64 = => i64::from_str(s).unwrap(); @@ -199,6 +201,8 @@ Bool: bool = { Float: f64 = => f64::from_str(f).unwrap(); +Char: u16 = => {let c = c.trim_matches('\''); escape_control_chars(&c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)}; + // https://lalrpop.github.io/lalrpop/tutorial/006_macros.html Comma: Vec = { // (1) ",")*> => match e { // (2) diff --git a/bril-rs/bril2json/src/lib.rs b/bril-rs/bril2json/src/lib.rs index b5134ca20..29439927d 100644 --- a/bril-rs/bril2json/src/lib.rs +++ b/bril-rs/bril2json/src/lib.rs @@ -12,6 +12,20 @@ use std::fs::File; use bril_rs::{AbstractProgram, ColRow, Position}; +fn escape_control_chars(s: &str) -> Option { + match s { + "\\0" => Some(0), + "\\a" => Some(7), + "\\b" => Some(8), + "\\t" => Some(9), + "\\n" => Some(10), + "\\v" => Some(11), + "\\f" => Some(12), + "\\r" => Some(13), + _ => None, + } +} + #[doc(hidden)] #[derive(Clone)] pub struct Lines { diff --git a/bril-rs/src/conversion.rs b/bril-rs/src/conversion.rs index 232b2faaa..3db1aa79a 100644 --- a/bril-rs/src/conversion.rs +++ b/bril-rs/src/conversion.rs @@ -245,6 +245,20 @@ impl TryFrom for Instruction { "fle" => ValueOps::Fle, #[cfg(feature = "float")] "fge" => ValueOps::Fge, + #[cfg(feature = "char")] + "ceq" => ValueOps::Ceq, + #[cfg(feature = "char")] + "clt" => ValueOps::Clt, + #[cfg(feature = "char")] + "cgt" => ValueOps::Cgt, + #[cfg(feature = "char")] + "cle" => ValueOps::Cle, + #[cfg(feature = "char")] + "cge" => ValueOps::Cge, + #[cfg(feature = "char")] + "char2int" => ValueOps::Char2int, + #[cfg(feature = "char")] + "int2char" => ValueOps::Int2char, #[cfg(feature = "memory")] "alloc" => ValueOps::Alloc, #[cfg(feature = "memory")] @@ -313,6 +327,8 @@ impl TryFrom for Type { AbstractType::Primitive(t) if t == "bool" => Self::Bool, #[cfg(feature = "float")] AbstractType::Primitive(t) if t == "float" => Self::Float, + #[cfg(feature = "char")] + AbstractType::Primitive(t) if t == "char" => Self::Char, AbstractType::Primitive(t) => return Err(ConversionError::InvalidPrimitive(t)), #[cfg(feature = "memory")] AbstractType::Parameterized(t, ty) if t == "ptr" => { diff --git a/bril-rs/src/program.rs b/bril-rs/src/program.rs index 4b29c88a4..e188f7ee1 100644 --- a/bril-rs/src/program.rs +++ b/bril-rs/src/program.rs @@ -451,6 +451,27 @@ pub enum ValueOps { /// #[cfg(feature = "float")] Fge, + /// + #[cfg(feature = "char")] + Ceq, + /// + #[cfg(feature = "char")] + Clt, + /// + #[cfg(feature = "char")] + Cgt, + /// + #[cfg(feature = "char")] + Cle, + /// + #[cfg(feature = "char")] + Cge, + /// + #[cfg(feature = "char")] + Char2int, + /// + #[cfg(feature = "char")] + Int2char, /// #[cfg(feature = "memory")] Alloc, @@ -499,6 +520,20 @@ impl Display for ValueOps { Self::Fle => write!(f, "fle"), #[cfg(feature = "float")] Self::Fge => write!(f, "fge"), + #[cfg(feature = "char")] + Self::Ceq => write!(f, "ceq"), + #[cfg(feature = "char")] + Self::Clt => write!(f, "clt"), + #[cfg(feature = "char")] + Self::Cgt => write!(f, "cgt"), + #[cfg(feature = "char")] + Self::Cle => write!(f, "cle"), + #[cfg(feature = "char")] + Self::Cge => write!(f, "cge"), + #[cfg(feature = "char")] + Self::Char2int => write!(f, "char2int"), + #[cfg(feature = "char")] + Self::Int2char => write!(f, "int2char"), #[cfg(feature = "memory")] Self::Alloc => write!(f, "alloc"), #[cfg(feature = "memory")] @@ -520,6 +555,9 @@ pub enum Type { /// #[cfg(feature = "float")] Float, + /// + #[cfg(feature = "char")] + Char, /// #[cfg(feature = "memory")] #[serde(rename = "ptr")] @@ -533,6 +571,8 @@ impl Display for Type { Self::Bool => write!(f, "bool"), #[cfg(feature = "float")] Self::Float => write!(f, "float"), + #[cfg(feature = "char")] + Self::Char => write!(f, "char"), #[cfg(feature = "memory")] Self::Pointer(tpe) => write!(f, "ptr<{tpe}>"), } @@ -551,6 +591,38 @@ pub enum Literal { /// Floating Points #[cfg(feature = "float")] Float(f64), + /// UTF-16 Characters + #[cfg(feature = "char")] + #[serde(deserialize_with = "deserialize_bmp")] + #[serde(serialize_with = "serialize_bmp")] + Char(u16), +} + +#[cfg(feature = "char")] +fn deserialize_bmp<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s = String::deserialize(deserializer)?; + + if s.len() != 1 { + return Err(serde::de::Error::custom("invalid UTF-16 character")); + } + + let c = encode_unicode::Utf16Char::from_str_start(&s) + .map_err(|_| serde::de::Error::custom("invalid UTF-16 character")) + .map(|c| c.0.to_tuple().0)?; + Ok(c) +} + +#[cfg(feature = "char")] +#[allow(clippy::trivially_copy_pass_by_ref)] // to match serde's signature +fn serialize_bmp(c: &u16, serializer: S) -> Result +where + S: serde::Serializer, +{ + let c = encode_unicode::Utf16Char::from_bmp(*c).unwrap(); + serializer.serialize_str(&c.to_string()) } impl Display for Literal { @@ -560,6 +632,12 @@ impl Display for Literal { Self::Bool(b) => write!(f, "{b}"), #[cfg(feature = "float")] Self::Float(x) => write!(f, "{x}"), + #[cfg(feature = "char")] + Self::Char(c) => write!( + f, + "\'{}\'", + encode_unicode::Utf16Char::from_bmp(*c).unwrap() + ), } } } @@ -573,6 +651,8 @@ impl Literal { Self::Bool(_) => Type::Bool, #[cfg(feature = "float")] Self::Float(_) => Type::Float, + #[cfg(feature = "char")] + Self::Char(_) => Type::Char, } } } diff --git a/brilirs/Cargo.toml b/brilirs/Cargo.toml index 096bb9069..5665b1433 100644 --- a/brilirs/Cargo.toml +++ b/brilirs/Cargo.toml @@ -22,11 +22,12 @@ clap = { version = "4.2", features = ["derive"] } fxhash = "0.2" mimalloc = "0.1" itoa = "1.0" +encode_unicode = "1.0.0" [dependencies.bril-rs] version = "0.1.0" path = "../bril-rs" -features = ["ssa", "memory", "float", "speculate"] +features = ["ssa", "memory", "float", "speculate", "char"] [dependencies.bril2json] version = "0.1.0" @@ -39,4 +40,4 @@ lto = true panic = "abort" [features] -completions = ["clap_complete"] \ No newline at end of file +completions = ["clap_complete"] diff --git a/brilirs/Makefile b/brilirs/Makefile index 76007ccf9..f142c5f13 100644 --- a/brilirs/Makefile +++ b/brilirs/Makefile @@ -2,6 +2,7 @@ TESTS := ../test/check/*.bril \ ../test/interp*/core*/*.bril \ ../test/interp*/float/*.bril \ ../test/interp*/mem*/*.bril \ +../test/interp*/char*/*.bril \ ../test/interp*/mixed/*.bril \ ../test/interp*/ssa*/*.bril \ diff --git a/brilirs/src/check.rs b/brilirs/src/check.rs index d6fea25da..f14a56e12 100644 --- a/brilirs/src/check.rs +++ b/brilirs/src/check.rs @@ -210,6 +210,55 @@ fn type_check_instruction<'a>( check_asmt_type(&Type::Bool, op_type)?; update_env(env, dest, op_type) } + Instruction::Value { + op: ValueOps::Ceq | ValueOps::Cge | ValueOps::Clt | ValueOps::Cgt | ValueOps::Cle, + args, + dest, + funcs, + labels, + pos: _, + op_type, + } => { + check_num_args(2, args)?; + check_num_funcs(0, funcs)?; + check_num_labels(0, labels)?; + check_asmt_type(&Type::Char, get_type(env, 0, args)?)?; + check_asmt_type(&Type::Char, get_type(env, 1, args)?)?; + check_asmt_type(&Type::Bool, op_type)?; + update_env(env, dest, op_type) + } + Instruction::Value { + op: ValueOps::Char2int, + args, + dest, + funcs, + labels, + pos: _, + op_type, + } => { + check_num_args(1, args)?; + check_num_funcs(0, funcs)?; + check_num_labels(0, labels)?; + check_asmt_type(&Type::Char, get_type(env, 0, args)?)?; + check_asmt_type(&Type::Int, op_type)?; + update_env(env, dest, op_type) + } + Instruction::Value { + op: ValueOps::Int2char, + args, + dest, + funcs, + labels, + pos: _, + op_type, + } => { + check_num_args(1, args)?; + check_num_funcs(0, funcs)?; + check_num_labels(0, labels)?; + check_asmt_type(&Type::Int, get_type(env, 0, args)?)?; + check_asmt_type(&Type::Char, op_type)?; + update_env(env, dest, op_type) + } Instruction::Value { op: ValueOps::Call, dest, diff --git a/brilirs/src/error.rs b/brilirs/src/error.rs index 7ff7a5a0a..7aaa3d038 100644 --- a/brilirs/src/error.rs +++ b/brilirs/src/error.rs @@ -21,6 +21,8 @@ pub enum InterpError { NoMainFunction, #[error("phi node has unequal numbers of labels and args")] UnequalPhiNode, + #[error("char must have one character")] + NotOneChar, #[error("multiple functions of the same name found")] DuplicateFunction, #[error("Expected empty return for `{0}`, found value")] @@ -53,6 +55,8 @@ pub enum InterpError { BadAsmtType(bril_rs::Type, bril_rs::Type), // (expected, actual). For when the LHS type of an instruction is bad #[error("There has been an io error: `{0:?}`")] IoError(#[from] std::io::Error), + #[error("value ${0} cannot be converted to char")] + ToCharError(i64), #[error("You probably shouldn't see this error, this is here to handle conversions between InterpError and PositionalError")] PositionalInterpErrorConversion(#[from] PositionalInterpError), } diff --git a/brilirs/src/interp.rs b/brilirs/src/interp.rs index b14fd2ef9..9275c53b2 100644 --- a/brilirs/src/interp.rs +++ b/brilirs/src/interp.rs @@ -2,6 +2,7 @@ use crate::basic_block::{BBFunction, BBProgram, BasicBlock}; use crate::error::{InterpError, PositionalInterpError}; use bril_rs::Instruction; +use encode_unicode::Utf16Char; use fxhash::FxHashMap; use mimalloc::MiMalloc; @@ -169,6 +170,7 @@ enum Value { Int(i64), Bool(bool), Float(f64), + Char(u16), Pointer(Pointer), #[default] Uninitialized, @@ -197,6 +199,7 @@ impl fmt::Display for Value { Self::Float(v) if v.is_infinite() && v.is_sign_positive() => write!(f, "Infinity"), Self::Float(v) if v.is_infinite() && v.is_sign_negative() => write!(f, "-Infinity"), Self::Float(v) => write!(f, "{v:.17}"), + Self::Char(c) => write!(f, "{}", Utf16Char::from_bmp(*c).unwrap()), Self::Pointer(p) => write!(f, "{p:?}"), Self::Uninitialized => unreachable!(), } @@ -211,6 +214,7 @@ fn optimized_val_output(out: &mut T, val: &Value) -> Result<( Value::Float(f) if f.is_infinite() && f.is_sign_negative() => out.write_all(b"-Infinity"), Value::Float(f) if f.is_nan() => out.write_all(b"NaN"), Value::Float(f) => out.write_all(format!("{f:.17}").as_bytes()), + Value::Char(c) => out.write_all(Utf16Char::from_bmp(*c).unwrap().to_string().as_bytes()), Value::Pointer(p) => out.write_all(format!("{p:?}").as_bytes()), Value::Uninitialized => unreachable!(), } @@ -222,6 +226,7 @@ impl From<&bril_rs::Literal> for Value { bril_rs::Literal::Int(i) => Self::Int(*i), bril_rs::Literal::Bool(b) => Self::Bool(*b), bril_rs::Literal::Float(f) => Self::Float(*f), + bril_rs::Literal::Char(c) => Self::Char(*c), } } } @@ -232,6 +237,7 @@ impl From for Value { bril_rs::Literal::Int(i) => Self::Int(i), bril_rs::Literal::Bool(b) => Self::Bool(b), bril_rs::Literal::Float(f) => Self::Float(f), + bril_rs::Literal::Char(c) => Self::Char(c), } } } @@ -266,6 +272,16 @@ impl From<&Value> for f64 { } } +impl From<&Value> for u16 { + fn from(value: &Value) -> Self { + if let Value::Char(c) = value { + *c + } else { + unreachable!() + } + } +} + impl<'a> From<&'a Value> for &'a Pointer { fn from(value: &'a Value) -> Self { if let Value::Pointer(p) = value { @@ -305,8 +321,8 @@ fn execute_value_op( last_label: Option<&String>, ) -> Result<(), InterpError> { use bril_rs::ValueOps::{ - Add, Alloc, And, Call, Div, Eq, Fadd, Fdiv, Feq, Fge, Fgt, Fle, Flt, Fmul, Fsub, Ge, Gt, Id, - Le, Load, Lt, Mul, Not, Or, Phi, PtrAdd, Sub, + Add, Alloc, And, Call, Ceq, Cge, Cgt, Char2int, Cle, Clt, Div, Eq, Fadd, Fdiv, Feq, Fge, Fgt, + Fle, Flt, Fmul, Fsub, Ge, Gt, Id, Int2char, Le, Load, Lt, Mul, Not, Or, Phi, PtrAdd, Sub, }; match op { Add => { @@ -420,6 +436,44 @@ fn execute_value_op( let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 >= arg1)); } + Ceq => { + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); + state.env.set(dest, Value::Bool(arg0 == arg1)); + } + Clt => { + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); + state.env.set(dest, Value::Bool(arg0 < arg1)); + } + Cgt => { + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); + state.env.set(dest, Value::Bool(arg0 > arg1)); + } + Cle => { + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); + state.env.set(dest, Value::Bool(arg0 <= arg1)); + } + Cge => { + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); + state.env.set(dest, Value::Bool(arg0 >= arg1)); + } + Char2int => { + let arg0 = get_arg::(&state.env, 0, args); + state.env.set(dest, Value::Int(i64::from(arg0))); + } + Int2char => { + let arg0 = get_arg::(&state.env, 0, args); + + let arg0_u16 = u16::try_from(arg0).map_err(|_| InterpError::ToCharError(arg0))?; + + let _arg0_char = Utf16Char::from_bmp(arg0_u16).map_err(|_| InterpError::ToCharError(arg0))?; + + state.env.set(dest, Value::Char(arg0_u16)); + } Call => { let callee_func = state.prog.get(funcs[0]).unwrap(); @@ -574,7 +628,7 @@ fn execute<'a, T: std::io::Write>( bril_rs::Literal::Float(f) => { state.env.set(numified_code.dest.unwrap(), Value::Float(*f)); } - bril_rs::Literal::Bool(_) => unreachable!(), + bril_rs::Literal::Char(_) | bril_rs::Literal::Bool(_) => unreachable!(), } } else { state @@ -634,6 +688,20 @@ fn execute<'a, T: std::io::Write>( } } +fn escape_control_chars(s: &str) -> Result { + match s { + "\\0" => Ok(0), + "\\a" => Ok(7), + "\\b" => Ok(8), + "\\t" => Ok(9), + "\\n" => Ok(10), + "\\v" => Ok(11), + "\\f" => Ok(12), + "\\r" => Ok(13), + _ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().get(0).unwrap()), + } +} + fn parse_args( mut env: Environment, args: &[bril_rs::Argument], @@ -687,6 +755,13 @@ fn parse_args( Ok(()) } bril_rs::Type::Pointer(..) => unreachable!(), + bril_rs::Type::Char => match escape_control_chars(inputs.get(index).unwrap().as_ref()) { + Ok(c) => { + env.set(*arg_as_num, Value::Char(c)); + Ok(()) + } + Err(_) => Err(InterpError::NotOneChar), + }, })?; Ok(env) } diff --git a/docs/tools/brilirs.md b/docs/tools/brilirs.md index 187ff1e92..fd238b2ca 100644 --- a/docs/tools/brilirs.md +++ b/docs/tools/brilirs.md @@ -3,7 +3,7 @@ Fast Interpreter in Rust The `brilirs` directory contains a fast Bril interpreter written in [Rust][]. It is a drop-in replacement for the [reference interpreter](interp.md) that prioritizes speed over completeness and hackability. -It implements [core Bril](../lang/core.md) along with the [SSA][], [memory][], and [floating point][float] extensions. +It implements [core Bril](../lang/core.md) along with the [SSA][], [memory][], [char][], and [floating point][float] extensions. Read [more about the implementation][blog], which is originally by Wil Thomason and Daniel Glus. @@ -36,4 +36,5 @@ To see all of the supported flags, run: [ssa]: ../lang/ssa.md [memory]: ../lang/memory.md [float]: ../lang/float.md +[char]: ../lang/char.md [blog]: https://www.cs.cornell.edu/courses/cs6120/2019fa/blog/faster-interpreter/ diff --git a/docs/tools/rust.md b/docs/tools/rust.md index 2611c23c3..c299113b5 100644 --- a/docs/tools/rust.md +++ b/docs/tools/rust.md @@ -1,7 +1,7 @@ Rust Library ============ -This is a no-frills interface between Bril's JSON and your [Rust][] code. It supports the [Bril core][core] along with the [SSA][], [memory][], [floating point][float], [speculative execution][spec], and [source positions][pos] extensions. +This is a no-frills interface between Bril's JSON and your [Rust][] code. It supports the [Bril core][core] along with the [SSA][], [memory][], [floating point][float], [speculative execution][spec], [char][], and [source positions][pos] extensions. Use --- @@ -55,4 +55,5 @@ make features [float]: ../lang/float.md [spec]: ../lang/spec.md [pos]: ../lang/syntax.md +[char]: ../lang/char.md [import]: ../lang/import.md From e0895fbeebdbffdf5e1682e50d1bd36c5f70fa73 Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Tue, 30 May 2023 15:04:25 -0400 Subject: [PATCH 2/8] clippy and brilift test fixes --- bril-rs/bril2json/src/bril_grammar.lalrpop | 2 +- brilift/Makefile | 2 +- brilirs/src/interp.rs | 17 +++++++++-------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/bril-rs/bril2json/src/bril_grammar.lalrpop b/bril-rs/bril2json/src/bril_grammar.lalrpop index bcfd0b43d..8c41e2514 100644 --- a/bril-rs/bril2json/src/bril_grammar.lalrpop +++ b/bril-rs/bril2json/src/bril_grammar.lalrpop @@ -201,7 +201,7 @@ Bool: bool = { Float: f64 = => f64::from_str(f).unwrap(); -Char: u16 = => {let c = c.trim_matches('\''); escape_control_chars(&c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)}; +Char: u16 = => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)}; // https://lalrpop.github.io/lalrpop/tutorial/006_macros.html Comma: Vec = { // (1) diff --git a/brilift/Makefile b/brilift/Makefile index 44f1f4400..3d2e63d96 100644 --- a/brilift/Makefile +++ b/brilift/Makefile @@ -9,7 +9,7 @@ endif # Brilift only supports core Bril for now, so we select those tests & # benchmarks. -TESTS := ../test/interp/core/*.bril ../test/interp/float/*.bril ../test/interp/mem/*.bril ../test/interp/mixed/*.bril +TESTS := ../test/interp/core/*.bril ../test/interp/float/*.bril ../test/interp/mem/*.bril ../test/interp/mixed/store-float.bril BENCHMARKS := ../benchmarks/core/*.bril ../benchmarks/float/*.bril ../benchmarks/mem/*.bril ../benchmarks/mixed/*.bril CFLAGS := $(if $(TARGET),-target $(TARGET)) diff --git a/brilirs/src/interp.rs b/brilirs/src/interp.rs index 9275c53b2..8429aa01a 100644 --- a/brilirs/src/interp.rs +++ b/brilirs/src/interp.rs @@ -698,7 +698,7 @@ fn escape_control_chars(s: &str) -> Result Ok(11), "\\f" => Ok(12), "\\r" => Ok(13), - _ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().get(0).unwrap()), + _ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().first().unwrap()), } } @@ -755,13 +755,14 @@ fn parse_args( Ok(()) } bril_rs::Type::Pointer(..) => unreachable!(), - bril_rs::Type::Char => match escape_control_chars(inputs.get(index).unwrap().as_ref()) { - Ok(c) => { - env.set(*arg_as_num, Value::Char(c)); - Ok(()) - } - Err(_) => Err(InterpError::NotOneChar), - }, + bril_rs::Type::Char => escape_control_chars(inputs.get(index).unwrap().as_ref()) + .map_or_else( + |_| Err(InterpError::NotOneChar), + |c| { + env.set(*arg_as_num, Value::Char(c)); + Ok(()) + }, + ), })?; Ok(env) } From 7407d9d7c741a208e9cb24b2d2c2a56a227230a5 Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Wed, 31 May 2023 12:56:23 -0400 Subject: [PATCH 3/8] Move to char internal representation --- bril-rs/Cargo.toml | 3 +- bril-rs/bril2json/Cargo.toml | 1 - bril-rs/bril2json/src/bril_grammar.lalrpop | 2 +- bril-rs/bril2json/src/lib.rs | 21 ++++--- bril-rs/src/program.rs | 52 ++++++------------ brilirs/Cargo.toml | 7 +-- brilirs/src/interp.rs | 64 +++++++++------------- 7 files changed, 61 insertions(+), 89 deletions(-) diff --git a/bril-rs/Cargo.toml b/bril-rs/Cargo.toml index e9a0d5948..950244a4a 100644 --- a/bril-rs/Cargo.toml +++ b/bril-rs/Cargo.toml @@ -17,7 +17,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] thiserror = "1.0" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } -encode_unicode = { version = "1.0.0", optional = true} [features] float = [] @@ -26,7 +25,7 @@ ssa = [] speculate = [] position = [] import = [] -char = ["dep:encode_unicode"] +char = [] [[example]] name = "bril2txt" diff --git a/bril-rs/bril2json/Cargo.toml b/bril-rs/bril2json/Cargo.toml index 9387bd5d7..b9212d8a8 100644 --- a/bril-rs/bril2json/Cargo.toml +++ b/bril-rs/bril2json/Cargo.toml @@ -15,7 +15,6 @@ keywords = ["compiler", "bril", "parser", "data-structures", "language"] [dependencies] clap = { version = "4.3", features = ["derive"] } -encode_unicode = "1.0.0" lalrpop-util = { version = "0.20", features = ["lexer"] } regex = "1.8" diff --git a/bril-rs/bril2json/src/bril_grammar.lalrpop b/bril-rs/bril2json/src/bril_grammar.lalrpop index 8c41e2514..742ce25df 100644 --- a/bril-rs/bril2json/src/bril_grammar.lalrpop +++ b/bril-rs/bril2json/src/bril_grammar.lalrpop @@ -201,7 +201,7 @@ Bool: bool = { Float: f64 = => f64::from_str(f).unwrap(); -Char: u16 = => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap_or_else(|| encode_unicode::Utf16Char::from_str_start(c).unwrap().0.to_tuple().0)}; +Char: char = => {let c = c.trim_matches('\''); escape_control_chars(c).unwrap()}; // https://lalrpop.github.io/lalrpop/tutorial/006_macros.html Comma: Vec = { // (1) diff --git a/bril-rs/bril2json/src/lib.rs b/bril-rs/bril2json/src/lib.rs index 29439927d..d921841d6 100644 --- a/bril-rs/bril2json/src/lib.rs +++ b/bril-rs/bril2json/src/lib.rs @@ -12,16 +12,19 @@ use std::fs::File; use bril_rs::{AbstractProgram, ColRow, Position}; -fn escape_control_chars(s: &str) -> Option { +/// A helper function for processing the accepted Bril characters from their text representation +#[must_use] +pub fn escape_control_chars(s: &str) -> Option { match s { - "\\0" => Some(0), - "\\a" => Some(7), - "\\b" => Some(8), - "\\t" => Some(9), - "\\n" => Some(10), - "\\v" => Some(11), - "\\f" => Some(12), - "\\r" => Some(13), + "\\0" => Some('\u{0000}'), + "\\a" => Some('\u{0007}'), + "\\b" => Some('\u{0008}'), + "\\t" => Some('\u{0009}'), + "\\n" => Some('\u{000A}'), + "\\v" => Some('\u{000B}'), + "\\f" => Some('\u{000C}'), + "\\r" => Some('\u{000D}'), + s if s.len() == 1 => s.chars().next(), _ => None, } } diff --git a/bril-rs/src/program.rs b/bril-rs/src/program.rs index e188f7ee1..ef9acb9ab 100644 --- a/bril-rs/src/program.rs +++ b/bril-rs/src/program.rs @@ -593,36 +593,7 @@ pub enum Literal { Float(f64), /// UTF-16 Characters #[cfg(feature = "char")] - #[serde(deserialize_with = "deserialize_bmp")] - #[serde(serialize_with = "serialize_bmp")] - Char(u16), -} - -#[cfg(feature = "char")] -fn deserialize_bmp<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - let s = String::deserialize(deserializer)?; - - if s.len() != 1 { - return Err(serde::de::Error::custom("invalid UTF-16 character")); - } - - let c = encode_unicode::Utf16Char::from_str_start(&s) - .map_err(|_| serde::de::Error::custom("invalid UTF-16 character")) - .map(|c| c.0.to_tuple().0)?; - Ok(c) -} - -#[cfg(feature = "char")] -#[allow(clippy::trivially_copy_pass_by_ref)] // to match serde's signature -fn serialize_bmp(c: &u16, serializer: S) -> Result -where - S: serde::Serializer, -{ - let c = encode_unicode::Utf16Char::from_bmp(*c).unwrap(); - serializer.serialize_str(&c.to_string()) + Char(char), } impl Display for Literal { @@ -633,15 +604,26 @@ impl Display for Literal { #[cfg(feature = "float")] Self::Float(x) => write!(f, "{x}"), #[cfg(feature = "char")] - Self::Char(c) => write!( - f, - "\'{}\'", - encode_unicode::Utf16Char::from_bmp(*c).unwrap() - ), + Self::Char(c) => write!(f, "\'{}\'", escape_char(*c)), } } } +#[cfg(feature = "char")] +fn escape_char(c: char) -> String { + match c { + '\u{0000}' => "\\0".to_string(), + '\u{0007}' => "\\a".to_string(), + '\u{0008}' => "\\b".to_string(), + '\u{0009}' => "\\t".to_string(), + '\u{000A}' => "\\n".to_string(), + '\u{000B}' => "\\v".to_string(), + '\u{000C}' => "\\f".to_string(), + '\u{000D}' => "\\r".to_string(), + c => c.to_string(), + } +} + impl Literal { /// A helper function to get the type of literal values #[must_use] diff --git a/brilirs/Cargo.toml b/brilirs/Cargo.toml index 5665b1433..f16e3341a 100644 --- a/brilirs/Cargo.toml +++ b/brilirs/Cargo.toml @@ -13,16 +13,15 @@ keywords = ["compiler", "bril", "interpreter", "data-structures", "language"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [build-dependencies] -clap = { version = "4.2", features = ["derive"] } -clap_complete= { version = "4.2", optional = true } +clap = { version = "4.3", features = ["derive"] } +clap_complete= { version = "4.3", optional = true } [dependencies] thiserror = "1.0" -clap = { version = "4.2", features = ["derive"] } +clap = { version = "4.3", features = ["derive"] } fxhash = "0.2" mimalloc = "0.1" itoa = "1.0" -encode_unicode = "1.0.0" [dependencies.bril-rs] version = "0.1.0" diff --git a/brilirs/src/interp.rs b/brilirs/src/interp.rs index 8429aa01a..17c5a5596 100644 --- a/brilirs/src/interp.rs +++ b/brilirs/src/interp.rs @@ -1,8 +1,8 @@ use crate::basic_block::{BBFunction, BBProgram, BasicBlock}; use crate::error::{InterpError, PositionalInterpError}; +use bril2json::escape_control_chars; use bril_rs::Instruction; -use encode_unicode::Utf16Char; use fxhash::FxHashMap; use mimalloc::MiMalloc; @@ -170,7 +170,7 @@ enum Value { Int(i64), Bool(bool), Float(f64), - Char(u16), + Char(char), Pointer(Pointer), #[default] Uninitialized, @@ -199,7 +199,7 @@ impl fmt::Display for Value { Self::Float(v) if v.is_infinite() && v.is_sign_positive() => write!(f, "Infinity"), Self::Float(v) if v.is_infinite() && v.is_sign_negative() => write!(f, "-Infinity"), Self::Float(v) => write!(f, "{v:.17}"), - Self::Char(c) => write!(f, "{}", Utf16Char::from_bmp(*c).unwrap()), + Self::Char(c) => write!(f, "{c}"), Self::Pointer(p) => write!(f, "{p:?}"), Self::Uninitialized => unreachable!(), } @@ -209,12 +209,15 @@ impl fmt::Display for Value { fn optimized_val_output(out: &mut T, val: &Value) -> Result<(), std::io::Error> { match val { Value::Int(i) => out.write_all(itoa::Buffer::new().format(*i).as_bytes()), - Value::Bool(b) => out.write_all(b.to_string().as_bytes()), + Value::Bool(b) => out.write_all(if *b { b"true" } else { b"false" }), Value::Float(f) if f.is_infinite() && f.is_sign_positive() => out.write_all(b"Infinity"), Value::Float(f) if f.is_infinite() && f.is_sign_negative() => out.write_all(b"-Infinity"), Value::Float(f) if f.is_nan() => out.write_all(b"NaN"), Value::Float(f) => out.write_all(format!("{f:.17}").as_bytes()), - Value::Char(c) => out.write_all(Utf16Char::from_bmp(*c).unwrap().to_string().as_bytes()), + Value::Char(c) => { + let buf = &mut [0_u8; 2]; + out.write_all(c.encode_utf8(buf).as_bytes()) + } Value::Pointer(p) => out.write_all(format!("{p:?}").as_bytes()), Value::Uninitialized => unreachable!(), } @@ -272,7 +275,7 @@ impl From<&Value> for f64 { } } -impl From<&Value> for u16 { +impl From<&Value> for char { fn from(value: &Value) -> Self { if let Value::Char(c) = value { *c @@ -437,42 +440,43 @@ fn execute_value_op( state.env.set(dest, Value::Bool(arg0 >= arg1)); } Ceq => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 == arg1)); } Clt => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 < arg1)); } Cgt => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 > arg1)); } Cle => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 <= arg1)); } Cge => { - let arg0 = get_arg::(&state.env, 0, args); - let arg1 = get_arg::(&state.env, 1, args); + let arg0 = get_arg::(&state.env, 0, args); + let arg1 = get_arg::(&state.env, 1, args); state.env.set(dest, Value::Bool(arg0 >= arg1)); } Char2int => { - let arg0 = get_arg::(&state.env, 0, args); - state.env.set(dest, Value::Int(i64::from(arg0))); + let arg0 = get_arg::(&state.env, 0, args); + state.env.set(dest, Value::Int(u32::from(arg0).into())); } Int2char => { let arg0 = get_arg::(&state.env, 0, args); - let arg0_u16 = u16::try_from(arg0).map_err(|_| InterpError::ToCharError(arg0))?; - - let _arg0_char = Utf16Char::from_bmp(arg0_u16).map_err(|_| InterpError::ToCharError(arg0))?; + let arg0_char = u32::try_from(arg0) + .ok() + .and_then(char::from_u32) + .ok_or(InterpError::ToCharError(arg0))?; - state.env.set(dest, Value::Char(arg0_u16)); + state.env.set(dest, Value::Char(arg0_char)); } Call => { let callee_func = state.prog.get(funcs[0]).unwrap(); @@ -688,20 +692,6 @@ fn execute<'a, T: std::io::Write>( } } -fn escape_control_chars(s: &str) -> Result { - match s { - "\\0" => Ok(0), - "\\a" => Ok(7), - "\\b" => Ok(8), - "\\t" => Ok(9), - "\\n" => Ok(10), - "\\v" => Ok(11), - "\\f" => Ok(12), - "\\r" => Ok(13), - _ => Utf16Char::from_str_start(s).map(|c| *c.0.to_array().first().unwrap()), - } -} - fn parse_args( mut env: Environment, args: &[bril_rs::Argument], @@ -757,7 +747,7 @@ fn parse_args( bril_rs::Type::Pointer(..) => unreachable!(), bril_rs::Type::Char => escape_control_chars(inputs.get(index).unwrap().as_ref()) .map_or_else( - |_| Err(InterpError::NotOneChar), + || Err(InterpError::NotOneChar), |c| { env.set(*arg_as_num, Value::Char(c)); Ok(()) From f42bc9e3d0c39486b90abd1ecb3da9a1b3098d8d Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:04:13 -0700 Subject: [PATCH 4/8] better fixes for glob ignoring --- bril-rs/brillvm/Makefile | 4 ++-- brilift/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bril-rs/brillvm/Makefile b/bril-rs/brillvm/Makefile index f551b01e0..c4c1c9109 100644 --- a/bril-rs/brillvm/Makefile +++ b/bril-rs/brillvm/Makefile @@ -2,13 +2,13 @@ TESTS := ../../test/interp/core/*.bril \ ../../test/interp/float/*.bril \ ../../test/interp/ssa/*.bril \ ../../test/interp/mem/*.bril \ - ../../test/interp/mixed/*.bril \ + ../../test/interp/mixed/*[^r].bril # A hack to exclude store-char.bril by excluding any file ending in r.bril # Currently ignoring the Cholesky benchmark because of (probably) a floating point rounding bug. BENCHMARKS := ../../benchmarks/core/*.bril \ ../../benchmarks/float/*.bril \ ../../benchmarks/mem/*.bril \ - ../../benchmarks/mixed/[!cholesky]*.bril + ../../benchmarks/mixed/[^c]*.bril # A hack to exclude cholesky.bril by excluding any file ending in r.bril clean: cargo clean diff --git a/brilift/Makefile b/brilift/Makefile index 3d2e63d96..68ee4d78a 100644 --- a/brilift/Makefile +++ b/brilift/Makefile @@ -9,7 +9,7 @@ endif # Brilift only supports core Bril for now, so we select those tests & # benchmarks. -TESTS := ../test/interp/core/*.bril ../test/interp/float/*.bril ../test/interp/mem/*.bril ../test/interp/mixed/store-float.bril +TESTS := ../test/interp/core/*.bril ../test/interp/float/*.bril ../test/interp/mem/*.bril ../test/interp/mixed/*[^r].bril # A hack to exclude store-char.bril by excluding any file ending in r.bril BENCHMARKS := ../benchmarks/core/*.bril ../benchmarks/float/*.bril ../benchmarks/mem/*.bril ../benchmarks/mixed/*.bril CFLAGS := $(if $(TARGET),-target $(TARGET)) From 8a3df7d37144970fd30e56fe1cec57a58c01f3a7 Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:04:45 -0700 Subject: [PATCH 5/8] Support emojis properly --- bril-rs/bril2json/src/lib.rs | 2 +- bril-rs/brild/src/lib.rs | 2 ++ test/interp-error/char-error/badconversion.bril | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bril-rs/bril2json/src/lib.rs b/bril-rs/bril2json/src/lib.rs index d921841d6..032a69b67 100644 --- a/bril-rs/bril2json/src/lib.rs +++ b/bril-rs/bril2json/src/lib.rs @@ -24,7 +24,7 @@ pub fn escape_control_chars(s: &str) -> Option { "\\v" => Some('\u{000B}'), "\\f" => Some('\u{000C}'), "\\r" => Some('\u{000D}'), - s if s.len() == 1 => s.chars().next(), + s if s.chars().count() == 1 => s.chars().next(), _ => None, } } diff --git a/bril-rs/brild/src/lib.rs b/bril-rs/brild/src/lib.rs index 92de3babe..4e92ad0bb 100644 --- a/bril-rs/brild/src/lib.rs +++ b/bril-rs/brild/src/lib.rs @@ -2,6 +2,8 @@ #![warn(missing_docs)] #![doc = include_str!("../README.md")] #![allow(clippy::module_name_repetitions)] +// todo until multiple versions of some really far down dependency are fixed +#![allow(clippy::multiple_crate_versions)] #[doc(hidden)] pub mod cli; diff --git a/test/interp-error/char-error/badconversion.bril b/test/interp-error/char-error/badconversion.bril index 7937f13d5..e6b742382 100644 --- a/test/interp-error/char-error/badconversion.bril +++ b/test/interp-error/char-error/badconversion.bril @@ -1,6 +1,6 @@ @main { i: int = const 56193; - + c: char = int2char i; print c; From 14337077e84c1035fa95da7d1b92a35a6f22a784 Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:27:50 -0700 Subject: [PATCH 6/8] Try to handle posix-isms in run.sh --- brilift/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brilift/run.sh b/brilift/run.sh index 07d1b6bf1..c932b849f 100755 --- a/brilift/run.sh +++ b/brilift/run.sh @@ -10,7 +10,7 @@ set -e # OS=$(uname -m) -if [[ "$OS" == 'arm64' ]]; then +if [ "$OS" = 'arm64' ]; then export TARGET=x86_64-unknown-darwin-macho fi From a2a9d4ce97d2863e069911819a85f088e5f443e8 Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Fri, 30 Jun 2023 10:32:30 -0700 Subject: [PATCH 7/8] Allow multiple versions of bitflags --- bril-rs/brillvm/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bril-rs/brillvm/src/lib.rs b/bril-rs/brillvm/src/lib.rs index 4419eb70b..255f2e2b9 100644 --- a/bril-rs/brillvm/src/lib.rs +++ b/bril-rs/brillvm/src/lib.rs @@ -2,6 +2,8 @@ #![allow(clippy::too_many_lines)] #![allow(clippy::needless_for_each)] #![doc = include_str!("../README.md")] +// When you run with --all-targets, you also get --target=all which includes --target=redox. This pulls in redox_sys via a chain of deps through inkwell-parking_lot which uses an older version of bitflags 1.3.2. Given backwards compatibility, it's going to be a very long time, if ever, that this gets updated(because of msrv changes). +#![allow(clippy::multiple_crate_versions)] #[doc(hidden)] pub mod cli; From daf59a710646e81d63a3bc4f7322970d6aa0581d Mon Sep 17 00:00:00 2001 From: Patrick LaFontaine <32135464+Pat-Lafon@users.noreply.github.com> Date: Mon, 3 Jul 2023 13:44:02 -0700 Subject: [PATCH 8/8] typo --- bril-rs/brillvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bril-rs/brillvm/Makefile b/bril-rs/brillvm/Makefile index c4c1c9109..a48e4929a 100644 --- a/bril-rs/brillvm/Makefile +++ b/bril-rs/brillvm/Makefile @@ -8,7 +8,7 @@ TESTS := ../../test/interp/core/*.bril \ BENCHMARKS := ../../benchmarks/core/*.bril \ ../../benchmarks/float/*.bril \ ../../benchmarks/mem/*.bril \ - ../../benchmarks/mixed/[^c]*.bril # A hack to exclude cholesky.bril by excluding any file ending in r.bril + ../../benchmarks/mixed/[^c]*.bril # A hack to exclude cholesky.bril by excluding any file starting in c. clean: cargo clean