diff --git a/lints/bump_seed_canonicalization/README.md b/lints/bump_seed_canonicalization/README.md index 4a89f1b..8053773 100644 --- a/lints/bump_seed_canonicalization/README.md +++ b/lints/bump_seed_canonicalization/README.md @@ -1,13 +1,18 @@ # bump_seed_canonicalization **What it does:** + Finds uses of solana_program::pubkey::PubKey::create_program_address that do not check the bump_seed **Why is this bad?** + Generally for every seed there should be a canonical address, so the user should not be able to pick the bump_seed, since that would result in a different address. +See https://github.com/crytic/building-secure-contracts/tree/master/not-so-smart-contracts/solana/improper_pda_validation + **Known problems:** + False positives, since the bump_seed check may be within some other function (does not trace through function calls). The bump seed may be also be safely stored in an account but passed from another function. @@ -16,3 +21,23 @@ False negatives, since our analysis is not path-sensitive (the bump_seed check m occur in all possible execution paths) **Example:** + +See https://github.com/coral-xyz/sealevel-attacks/blob/master/programs/7-bump-seed-canonicalization/insecure/src/lib.rs for an insecure example + +Use instead: + +See https://github.com/coral-xyz/sealevel-attacks/blob/master/programs/7-bump-seed-canonicalization/recommended/src/lib.rs for recommended way to use bump. + +**How the lint is implemented:** + +- For every function containing calls to `solana_program::pubkey::Pubkey::create_program_address` +- find the `bump` location from the first argument to `create_program_address` call. + - first argument is the seeds array(`&[&[u8]]`). In general, the seeds are structured with bump as last element: + `&[seed1, seed2, ..., &[bump]]` e.g `&[b"vault", &[bump]]`. + - find the locations of bump. + - If bump is assigned by accessing a struct field + - if bump is assigned from a struct implementing `AnchorDeserialize` trait + - report a warning to use `#[account(...)` macro + - else report "bump may not be constrainted" warning + - else check if the bump is checked using a comparison operation + - report a warning if the bump is not checked diff --git a/lints/bump_seed_canonicalization/src/lib.rs b/lints/bump_seed_canonicalization/src/lib.rs index 710107e..aa7787f 100644 --- a/lints/bump_seed_canonicalization/src/lib.rs +++ b/lints/bump_seed_canonicalization/src/lib.rs @@ -24,13 +24,18 @@ extern crate rustc_middle; dylint_linting::declare_late_lint! { /// **What it does:** + /// /// Finds uses of solana_program::pubkey::PubKey::create_program_address that do not check the bump_seed /// /// **Why is this bad?** + /// /// Generally for every seed there should be a canonical address, so the user should not be /// able to pick the bump_seed, since that would result in a different address. + /// + /// See https://github.com/crytic/building-secure-contracts/tree/master/not-so-smart-contracts/solana/improper_pda_validation /// /// **Known problems:** + /// /// False positives, since the bump_seed check may be within some other function (does not /// trace through function calls). The bump seed may be also be safely stored in an account but /// passed from another function. @@ -39,6 +44,26 @@ dylint_linting::declare_late_lint! { /// occur in all possible execution paths) /// /// **Example:** + /// + /// See https://github.com/coral-xyz/sealevel-attacks/blob/master/programs/7-bump-seed-canonicalization/insecure/src/lib.rs for an insecure example + /// + /// Use instead: + /// + /// See https://github.com/coral-xyz/sealevel-attacks/blob/master/programs/7-bump-seed-canonicalization/recommended/src/lib.rs for recommended way to use bump. + /// + /// **How the lint is implemented:** + /// + /// - For every function containing calls to `solana_program::pubkey::Pubkey::create_program_address` + /// - find the `bump` location from the first argument to `create_program_address` call. + /// - first argument is the seeds array(`&[&[u8]]`). In general, the seeds are structured with bump as last element: + /// `&[seed1, seed2, ..., &[bump]]` e.g `&[b"vault", &[bump]]`. + /// - find the locations of bump. + /// - If bump is assigned by accessing a struct field + /// - if bump is assigned from a struct implementing `AnchorDeserialize` trait + /// - report a warning to use `#[account(...)` macro + /// - else report "bump may not be constrainted" warning + /// - else check if the bump is checked using a comparison operation + /// - report a warning if the bump is not checked pub BUMP_SEED_CANONICALIZATION, Warn, "Finds calls to create_program_address that do not check the bump_seed" @@ -48,10 +73,13 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { fn check_body(&mut self, cx: &LateContext<'tcx>, body: &'tcx Body<'tcx>) { let hir_map = cx.tcx.hir(); let body_did = hir_map.body_owner_def_id(body.id()).to_def_id(); + // The body is the body of function whose mir is available + // fn_like includes fn, const fn, async fn but not closures. if !cx.tcx.def_kind(body_did).is_fn_like() || !cx.tcx.is_mir_available(body_did) { return; } let body_mir = cx.tcx.optimized_mir(body_did); + // list of block id and the terminator of the basic blocks in the CFG let terminators = body_mir .basic_blocks .iter_enumerated() @@ -59,6 +87,7 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { for (_idx, (block_id, terminator)) in terminators.enumerate() { if_chain! { if let t = terminator.as_ref().unwrap(); + // The terminator is call to a function if let TerminatorKind::Call { func: func_operand, args, @@ -71,13 +100,16 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { then { // Static call let callee_did = *def_id; + // called function is `solana_program::pubkey::Pubkey::create_program_address` if match_def_path( cx, callee_did, &paths::SOLANA_PROGRAM_CREATE_PROGRAM_ADDRESS, ) { + // get the seeds argument; seeds is the first argument let seed_arg = &args[0]; if let Operand::Move(p) = seed_arg { + // find all alias of bump in the seeds array: &[seed1, ..., &[bump]]. let (dataflow_state, likely_bump_places): ( BackwardDataflowState, Vec, @@ -85,6 +117,7 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { let likely_bump_locals: Vec = likely_bump_places.iter().map(|pl| pl.local).collect(); match dataflow_state { + // found the location of bump BackwardDataflowState::Bump => { // If the bump seed is just passed in but didn't come from a // structure, look for equality checks that might show that @@ -102,6 +135,8 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { ); } } + // bump value is accessed from a struct which does not implement AnchorDeserialize trait + // non anchor struct => not part of state BackwardDataflowState::NonAnchorStructContainingBump => { // Value came from a non-anchor struct. We will warn here // just to be safe, since we can't tell if this bump seed @@ -113,6 +148,9 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { "Bump seed comes from structure, ensure it is constrained to a single value and not user-controlled.", ); } + // TODO: Should we report this??? + // bump for one anchor account might be stored in a different account, it might not be + // always possible to use the #[account(...)] macro BackwardDataflowState::AnchorStructContainingBump => { // Value came from an anchor struct. They should be using // the account macro for this. @@ -133,6 +171,7 @@ impl<'tcx> LateLintPass<'tcx> for BumpSeedCanonicalization { } } +/// Return true if the `deser_ty` implements `anchor::AccountDeserialize` trait else false fn is_anchor_account_struct<'tcx>(cx: &LateContext<'tcx>, deser_ty: Ty<'tcx>) -> bool { let mut account_deserialize = false; if let Some(anchor_trait_id) = get_trait_def_id(cx, &paths::ANCHOR_LANG_ACCOUNT_DESERIALIZE) { @@ -151,6 +190,8 @@ enum BackwardDataflowState { } impl BumpSeedCanonicalization { + /// Given the seeds_arg, a location passed to first argument of `create_program_address`, + /// find all locations/alias of bump: &[seed1, .., &[bump]] fn find_bump_seed_for_seed_array<'tcx>( cx: &LateContext<'tcx>, body: &'tcx mir::Body<'tcx>, @@ -178,11 +219,14 @@ impl BumpSeedCanonicalization { Operand::Copy(rvalue_place) | Operand::Move(rvalue_place), _, ) => { + // if seed_arg = x then trace for assignments of x seeds_arg = rvalue_place; + // state is Bump => seed_arg stores the bump if state == BackwardDataflowState::Bump { likely_bump_seed_aliases.push(*rvalue_place); } if_chain! { + // if seed_arg stores bump and rvalue is such that `x.y` (field access) if state == BackwardDataflowState::Bump; if let Some(proj) = rvalue_place.iter_projections().find_map(|(_, proj)| { @@ -193,6 +237,7 @@ impl BumpSeedCanonicalization { }); if let ProjectionElem::Field(_, _) = proj; then { + // if the bump is accessed from a Anchor struct (representing program state) state = if is_anchor_account_struct( cx, Place::ty_from(rvalue_place.local, &[], body, cx.tcx) @@ -206,18 +251,26 @@ impl BumpSeedCanonicalization { } } } + // rhs is array Rvalue::Aggregate(box AggregateKind::Array(_), elements) => match state { BackwardDataflowState::SeedsArray if elements.len() > 1 => { + // if seeds_arg stores the `seeds` location, find the location of bump + // bump is the last element: [seed1, seed2, ..., bump] if let Operand::Move(pl) = elements.last().unwrap() { + // update the seeds_arg to point to pl and update the state seeds_arg = pl; state = BackwardDataflowState::FirstSeed; } } BackwardDataflowState::FirstSeed if elements.len() == 1 => { + // seeds_arg points to bump array [ seed1, ..., &[bump]. seeds_arg stores + // the location of &[bump]. update it to store the location of bump. if let Operand::Move(pl) = &elements[0] { + // store the location of bump seeds_arg = pl; likely_bump_seed_aliases.push(*seeds_arg); + // seeds_arg is a location of bump state = BackwardDataflowState::Bump; } } @@ -254,6 +307,9 @@ impl BumpSeedCanonicalization { return true; } } + // look for chain of assign statements whose value is eventually assigned to the `search_place` and + // see if any of the intermediate local is in the search_list. + // TODO: move this and ArbitraryCPI::is_moved_from to utils. loop { for stmt in body.basic_blocks[cur_block].statements.iter().rev() { match &stmt.kind { @@ -290,6 +346,8 @@ impl BumpSeedCanonicalization { false } + // This function takes the list of bump_locals and a starting block, and searches for a + // check elsewhere in the Body that would compare the program_id with something else. fn is_bump_seed_checked<'tcx>( cx: &LateContext, body: &'tcx mir::Body<'tcx>, @@ -298,11 +356,14 @@ impl BumpSeedCanonicalization { for (block_id, block) in body.basic_blocks.iter_enumerated() { for stmt in &block.statements { if_chain! { + // look for assign statements if let StatementKind::Assign(box (_, rvalue)) = &stmt.kind; + // check if rhs is comparison between bump and some other value. if let Rvalue::BinaryOp(BinOp::Eq | BinOp::Ne, box (op0, op1)) = rvalue; if let Operand::Copy(arg0_pl) | Operand::Move(arg0_pl) = op0; if let Operand::Copy(arg1_pl) | Operand::Move(arg1_pl) = op1; then { + // Check if one of the args in comparison came from a local of bump if Self::is_moved_from(cx, body, block_id, arg0_pl, bump_locals) || Self::is_moved_from(cx, body, block_id, arg1_pl, bump_locals) { diff --git a/lints/insecure_account_close/README.md b/lints/insecure_account_close/README.md index dfc2279..364b41b 100644 --- a/lints/insecure_account_close/README.md +++ b/lints/insecure_account_close/README.md @@ -1,5 +1,34 @@ # insecure_account_close -**What it does:** Checks for attempts to close an account by setting its lamports to 0 but -not also clearing its data. See: -https://docs.solana.com/developing/programming-model/transactions#multiple-instructions-in-a-single-transaction +**What it does:** + +Checks for attempts to close an account by setting its lamports to 0 but +not also clearing its data. + +**Why is this bad?** + +See: https://docs.solana.com/developing/programming-model/transactions#multiple-instructions-in-a-single-transaction + +> An example of where this could be a problem is if a token program, upon transferring the token out of an account, sets the account's lamports to zero, assuming it will be deleted by the runtime. If the program does not zero out the account's data, a malicious user could trail this instruction with another that transfers the tokens a second time. + +**Known problems:** + +None + +**Example:** + +See https://github.com/coral-xyz/sealevel-attacks/tree/master/programs/9-closing-accounts for examples of insecure, secure and recommended + approach to close an account. + +**How the lint is implemented:** + +- For every expression like `(*(*some_expr).lamports.borrow_mut()) = 0;`; assigning `0` to account's lamports +- If the body enclosing the expression `is_force_defund`, ignore the expression + - The body contains expressions `some_expr.copy_from_slice(&another_expr[0..8])` and comparison expression + comparing an `[u8; 8]` value. +- Else If the body contains a manual clear of the account data + - If the body has a for loop like pattern and the loop body has an expression assigning zero + - Assume the loop is clearing the account data and the expression is safe +- Else + - report the expression as vulnerable + diff --git a/lints/insecure_account_close/src/lib.rs b/lints/insecure_account_close/src/lib.rs index 9c721f9..6d4bd52 100644 --- a/lints/insecure_account_close/src/lib.rs +++ b/lints/insecure_account_close/src/lib.rs @@ -15,9 +15,38 @@ use rustc_middle::ty::{TyKind, UintTy}; use solana_lints::utils::visit_expr_no_bodies; dylint_linting::declare_late_lint! { - /// **What it does:** Checks for attempts to close an account by setting its lamports to 0 but - /// not also clearing its data. See: - /// https://docs.solana.com/developing/programming-model/transactions#multiple-instructions-in-a-single-transaction + /// **What it does:** + /// + /// Checks for attempts to close an account by setting its lamports to `0` but + /// not also clearing its data. + /// + /// **Why is this bad?** + /// + /// See: https://docs.solana.com/developing/programming-model/transactions#multiple-instructions-in-a-single-transaction + /// + /// > An example of where this could be a problem is if a token program, upon transferring the token out of an account, sets the account's lamports to zero, assuming it will be deleted by the runtime. If the program does not zero out the account's data, a malicious user could trail this instruction with another that transfers the tokens a second time. + /// + /// **Known problems:** + /// + /// None + /// + /// **Example:** + /// + /// See https://github.com/coral-xyz/sealevel-attacks/tree/master/programs/9-closing-accounts for examples of insecure, secure and recommended + /// approach to close an account. + /// + /// **How the lint is implemented:** + /// + /// - For every expression like `(*(*some_expr).lamports.borrow_mut()) = 0;`; assigning `0` to account's lamports + /// - If the body enclosing the expression `is_force_defund`, ignore the expression + /// - The body contains expressions `some_expr.copy_from_slice(&another_expr[0..8])` and comparison expression + /// comparing an `[u8; 8]` value. + /// - Else If the body contains a manual clear of the account data + /// - If the body has a for loop like pattern and the loop body has an expression assigning zero + /// - Assume the loop is clearing the account data and the expression is safe + /// - Else + /// - report the expression as vulnerable + /// pub INSECURE_ACCOUNT_CLOSE, Warn, "attempt to close an account without also clearing its data" @@ -26,11 +55,15 @@ dylint_linting::declare_late_lint! { impl<'tcx> LateLintPass<'tcx> for InsecureAccountClose { fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { if_chain! { + // if expr is `(*(*some_expr).lamports.borrow_mut()) = 0;` if is_account_close(expr); let body_owner_hir_id = cx.tcx.hir().enclosing_body_owner(expr.hir_id); let body_id = cx.tcx.hir().body_owned_by(body_owner_hir_id); let body = cx.tcx.hir().body(body_id); + // if the body does not contain `some_expr.copy_from_slice(&another_expr[0..8])` and + // comparison of `[u8; 8]` value. if !is_force_defund(cx, body); + // if the body does not contain a for loop with an expression assigning zero. (Assume clearing data) if !contains_manual_clear(body); then { span_lint( @@ -44,6 +77,7 @@ impl<'tcx> LateLintPass<'tcx> for InsecureAccountClose { } } +// Return true if expr is `(*(*some_expr).lamports.borrow_mut()) = 0;` fn is_account_close(expr: &Expr<'_>) -> bool { if_chain! { if let Some(place) = is_zero_assignment(expr); @@ -69,6 +103,7 @@ fn is_force_defund<'tcx>(cx: &LateContext<'tcx>, body: &'tcx Body<'tcx>) -> bool contains_initial_eight_byte_copy_slice(body) && contains_eight_byte_array_comparison(cx, body) } +/// Return true if the body has `some_expr.copy_from_slice(&another_expr[0..8])` expression fn contains_initial_eight_byte_copy_slice<'tcx>(body: &'tcx Body<'tcx>) -> bool { visit_expr_no_bodies(body.value, |expr| { is_initial_eight_byte_copy_from_slice(expr).then_some(()) @@ -76,6 +111,7 @@ fn contains_initial_eight_byte_copy_slice<'tcx>(body: &'tcx Body<'tcx>) -> bool .is_some() } +/// Return true if expr matches `some_expr.copy_from_slice(&another_expr[0..8])` fn is_initial_eight_byte_copy_from_slice(expr: &Expr<'_>) -> bool { if_chain! { if let ExprKind::MethodCall(method_name, _, args, _) = expr.kind; @@ -98,6 +134,8 @@ fn is_initial_eight_byte_copy_from_slice(expr: &Expr<'_>) -> bool { } } + +/// Return true if the body contains an comparison expr and one of the values compared is array: [u8; 8] fn contains_eight_byte_array_comparison<'tcx>( cx: &LateContext<'tcx>, body: &'tcx Body<'tcx>, @@ -108,6 +146,8 @@ fn contains_eight_byte_array_comparison<'tcx>( .is_some() } + +/// Return true if the expr is a comparison and one of the values is array type: [u8; 8] fn is_eight_byte_array_comparison<'tcx>(cx: &LateContext<'tcx>, expr: &Expr<'tcx>) -> bool { if_chain! { if let ExprKind::Binary(op, left, right) = expr.kind; @@ -121,6 +161,7 @@ fn is_eight_byte_array_comparison<'tcx>(cx: &LateContext<'tcx>, expr: &Expr<'tcx } } +/// Return true if type of the expr is an Array of type u8 and its length is 8 fn is_eight_byte_array<'tcx>(cx: &LateContext<'tcx>, expr: &Expr<'tcx>) -> bool { let ty = cx.typeck_results().expr_ty(expr); if_chain! { @@ -136,13 +177,17 @@ fn is_eight_byte_array<'tcx>(cx: &LateContext<'tcx>, expr: &Expr<'tcx>) -> bool } } +/// Return true if the Body contains a for loop that zero assignment fn contains_manual_clear<'tcx>(body: &'tcx Body<'tcx>) -> bool { visit_expr_no_bodies(body.value, |expr| is_manual_clear(expr).then_some(())).is_some() } +/// Return true is `expr` has a pattern for a `for` loop and the loop contains zero assignment fn is_manual_clear(expr: &Expr<'_>) -> bool { if_chain! { + // if expr has the pattern for a `for` loop if let Some(higher::ForLoop { body, .. }) = higher::ForLoop::hir(expr); + // check if the body of the loop has `x = 0` expression for some `x` if contains_zero_assignment(body); then { true @@ -152,10 +197,12 @@ fn is_manual_clear(expr: &Expr<'_>) -> bool { } } +/// Return true if any of the expressions contains `x = 0` type assignment fn contains_zero_assignment<'tcx>(expr: &'tcx Expr<'tcx>) -> bool { visit_expr_no_bodies(expr, is_zero_assignment).is_some() } +/// Return Some(place) if the expr is an assignment of `0` literal to `place` else None fn is_zero_assignment<'tcx>(expr: &'tcx Expr<'tcx>) -> Option<&'tcx Expr<'tcx>> { if_chain! { if let ExprKind::Assign(place, value, _) = expr.kind; diff --git a/lints/type_cosplay/README.md b/lints/type_cosplay/README.md index 912ee6c..13bb4e7 100644 --- a/lints/type_cosplay/README.md +++ b/lints/type_cosplay/README.md @@ -1,6 +1,8 @@ # type_cosplay -**What it does:** Checks that all deserialized types have a proper discriminant so that +**What it does:** + +Checks that all deserialized types have a proper discriminant so that all types are guaranteed to deserialize differently. Instead of searching for equivalent types and checking to make sure those specific @@ -18,13 +20,16 @@ discriminant. If it is the only type that is deserialized, then all struct types are guaranteed to be unique since the program will have to match a specific variant. **Why is this bad?** + The type cosplay issue is when one account type can be substituted for another account type. This occurs when a type deserializes exactly the same as another type, such that you can't tell the difference between deserialized type `X` and deserialized type `Y`. This allows a malicious user to substitute `X` for `Y` or vice versa, and the code may perform unauthorized actions with the bytes. -**Known problems:** In the case when only one enum is deserialized, this lint by default +**Known problems:** + +In the case when only one enum is deserialized, this lint by default regards that as secure. However, this is not always the case. For example, if the program defines another enum and serializes, but never deserializes it, a user could create this enum, and, if it deserializes the same as the first enum, then this may be a possible vulnerability. @@ -109,3 +114,26 @@ an explicit check to make sure the discriminant is as expected. This example fixes both the insecure and insecure-2 examples. It is secure because it only deserializes from a single enum, and that enum encapsulates all of the user-defined types. Since enums contain an implicit discriminant, this program will always be secure as long as all types are defined under the enum. + +**How the lint is implemented:** + +- Find call expressions which has an arg that accesses account data + - Arg expression contains `x.data`; `x` is of type `AccountInfo` +- Get the type that the function was called on, ie X in X::call() +- if `X` implements `anchor_lang::Discriminator` trait but the function called is not `try_deserialize` + - warn to use `try_deserialize` or to account for type's discriminator +- else if the function called is Borsh `try_from_slice`, collect the deserialized type +- Repeat the above for all call expressions and collect all deserialized types; `X` from `X::try_from_slice()` expressions. +- If number of different kinds of types deserialized is more than `1`, i.e the code deserializes `Enum` type, `Struct` type, etc. + - warn to either deserialize from only structs or only an enum +- If the deserialized types are all enum + - If number of deserialized enums are more than `1` + - warn to use single enum that contains all type definitions + - Else assume that the single enum is safe and do not report +- Else the deserialized types are structs + - For each deserialized type + - If the struct has first field of type enum and number of variants of the enum are more than the + number of deserialized types + - The struct has proper discriminant + - Else warn to add an enum with at least as many variants as there are deserialized types. + diff --git a/lints/type_cosplay/src/lib.rs b/lints/type_cosplay/src/lib.rs index d0a99ee..808e858 100644 --- a/lints/type_cosplay/src/lib.rs +++ b/lints/type_cosplay/src/lib.rs @@ -23,7 +23,9 @@ use rustc_span::{def_id::DefId, Span}; use solana_lints::{paths, utils::visit_expr_no_bodies}; dylint_linting::impl_late_lint! { - /// **What it does:** Checks that all deserialized types have a proper discriminant so that + /// **What it does:** + /// + /// Checks that all deserialized types have a proper discriminant so that /// all types are guaranteed to deserialize differently. /// /// Instead of searching for equivalent types and checking to make sure those specific @@ -41,13 +43,16 @@ dylint_linting::impl_late_lint! { /// are guaranteed to be unique since the program will have to match a specific variant. /// /// **Why is this bad?** + /// /// The type cosplay issue is when one account type can be substituted for another account type. /// This occurs when a type deserializes exactly the same as another type, such that you can't /// tell the difference between deserialized type `X` and deserialized type `Y`. This allows a /// malicious user to substitute `X` for `Y` or vice versa, and the code may perform unauthorized /// actions with the bytes. /// - /// **Known problems:** In the case when only one enum is deserialized, this lint by default + /// **Known problems:** + /// + /// In the case when only one enum is deserialized, this lint by default /// regards that as secure. However, this is not always the case. For example, if the program /// defines another enum and serializes, but never deserializes it, a user could create this enum, /// and, if it deserializes the same as the first enum, then this may be a possible vulnerability. @@ -132,6 +137,29 @@ dylint_linting::impl_late_lint! { /// This example fixes both the insecure and insecure-2 examples. It is secure because it only deserializes /// from a single enum, and that enum encapsulates all of the user-defined types. Since enums contain /// an implicit discriminant, this program will always be secure as long as all types are defined under the enum. + /// + /// **How the lint is implemented:** + /// + /// - Find call expressions which has an arg that accesses account data + /// - Arg expression contains `x.data`; `x` is of type `AccountInfo` + /// - Get the type that the function was called on, ie X in X::call() + /// - if `X` implements `anchor_lang::Discriminator` trait but the function called is not `try_deserialize` + /// - warn to use `try_deserialize` or to account for type's discriminator + /// - else if the function called is Borsh `try_from_slice`, collect the deserialized type + /// - Repeat the above for all call expressions and collect all deserialized types; `X` from `X::try_from_slice()` expressions. + /// - If number of different kinds of types deserialized is more than `1`, i.e the code deserializes `Enum` type, `Struct` type, etc. + /// - warn to either deserialize from only structs or only an enum + /// - If the deserialized types are all enum + /// - If number of deserialized enums are more than `1` + /// - warn to use single enum that contains all type definitions + /// - Else assume that the single enum is safe and do not report + /// - Else the deserialized types are structs + /// - For each deserialized type + /// - If the struct has first field of type enum and number of variants of the enum are more than the + /// number of deserialized types + /// - The struct has proper discriminant + /// - Else warn to add an enum with at least as many variants as there are deserialized types. + /// pub TYPE_COSPLAY, Warn, "type is equivalent to another type", @@ -153,6 +181,7 @@ impl<'tcx> LateLintPass<'tcx> for TypeCosplay { // smoelius: I updated the `recommended-2` test so that the call contains a reference to // `AccountInfo.data`. But @victor-wei126's comment is still relevant in that we need a // more general solution for finding references to `AccountInfo.data`. + // do any of the args access `x.data` where x is of type `AccountInfo` if args_exprs.iter().any(|arg| { visit_expr_no_bodies(arg, |expr| contains_data_field_reference(cx, expr)) }); @@ -165,11 +194,14 @@ impl<'tcx> LateLintPass<'tcx> for TypeCosplay { let middle_ty = cx.tcx.type_of(def_id); then { if_chain! { + // the type implements `anchor_lang::Discriminator` if let Some(trait_did) = get_trait_def_id(cx, &paths::ANCHOR_LANG_DISCRIMINATOR); if implements_trait(cx, middle_ty, trait_did, &[]); + // But the function is not `try_deserialize` if let Some(def_id) = cx.typeck_results().type_dependent_def_id(fnc_expr.hir_id); if !match_def_path(cx, def_id, &paths::ANCHOR_LANG_TRY_DESERIALIZE); then { + // warn to use `try_deserialize` span_lint_and_help( cx, TYPE_COSPLAY, @@ -186,6 +218,7 @@ impl<'tcx> LateLintPass<'tcx> for TypeCosplay { if let MiddleTyKind::Adt(adt_def, _) = middle_ty.kind() { let adt_kind = adt_def.adt_kind(); let def_id = adt_def.did(); + // store the deserialized type if let Some(vec) = self.deser_types.get_mut(&adt_kind) { vec.push((def_id, ty.span)); } else { @@ -209,6 +242,7 @@ impl<'tcx> LateLintPass<'tcx> for TypeCosplay { _ => check_structs_have_discriminant(cx, v), // NOTE: also catches unions } } else if self.deser_types.len() > 1 { + // Number of AdtKind's of different deserialization is > 1 // Retrieve spans: iter through map, grab first elem of each key-pair, then get span let mut spans = vec![]; self.deser_types.iter().for_each(|(_, v)| { @@ -226,6 +260,7 @@ impl<'tcx> LateLintPass<'tcx> for TypeCosplay { } } +/// Return true if the expr is Borsh `try_from_slice` else false fn is_deserialize_function(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { match cx.typeck_results().type_dependent_def_id(expr.hir_id) { Some(def_id) => match_def_path(cx, def_id, &paths::BORSH_TRY_FROM_SLICE), @@ -233,6 +268,7 @@ fn is_deserialize_function(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { } } +/// Return true if the `expr` accesses `.data` on a value whose type is `solana_program::account_info::AccountInfo` fn contains_data_field_reference(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool { if_chain! { if let ExprKind::Field(obj_expr, ident) = expr.kind; @@ -247,6 +283,7 @@ fn contains_data_field_reference(cx: &LateContext<'_>, expr: &Expr<'_>) -> bool } } +// if number of enums are 1 then ignore otherwise warn the first two instances found fn check_enums(cx: &LateContext<'_>, enums: &Vec<(DefId, Span)>) { #[allow(clippy::comparison_chain)] if enums.len() > 1 { @@ -267,6 +304,7 @@ fn check_enums(cx: &LateContext<'_>, enums: &Vec<(DefId, Span)>) { } } +/// Check each of the struct has first field of type enum with number of variants > `types.len()` fn check_structs_have_discriminant(cx: &LateContext<'_>, types: &Vec<(DefId, Span)>) { let num_structs = types.len(); types @@ -278,10 +316,12 @@ fn check_structs_have_discriminant(cx: &LateContext<'_>, types: &Vec<(DefId, Spa /// the number of variants at least the number of deserialized structs. Further the discriminant should /// be the first field in the adt. fn has_discriminant(cx: &LateContext, adt: AdtDef, num_struct_types: usize, span: Span) { + // get the type of the first field let variant = adt.variants().get(Idx::new(0)).unwrap(); let first_field_def = &variant.fields[0]; let ty = cx.tcx.type_of(first_field_def.did); if_chain! { + // the type is an enum with variants > `num_struct_types``. if let MiddleTyKind::Adt(adt_def, _) = ty.kind(); if adt_def.is_enum(); if adt_def.variants().len() >= num_struct_types;