diff --git a/.gitignore b/.gitignore index 5dbbffb..c54a17b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,22 @@ -target/ -dbt_modules/ -logs/ .env .notes dbt_docs.sh dbt_docs -.DS_Store + + + +target/ +dbt_modules/ +# newer versions of dbt use this directory instead of dbt_modules for test dependencies +dbt_packages/ +logs/ + +.venv/ +.python-version + +# Visual Studio Code files +*/.vscode +*.code-workspace +.history/ +**/.DS_Store +.vscode/ diff --git a/dbt_project.yml b/dbt_project.yml index c20a607..8c533f3 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -6,15 +6,15 @@ version: "1.0.0" config-version: 2 # This setting configures which "profile" dbt uses for this project. -profile: "default" +profile: "harmony" # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that models in this project can be # found in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] +model-paths: ["models"] analysis-paths: ["analysis"] test-paths: ["tests"] -data-paths: ["data"] +seed-paths: ["data"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] @@ -22,6 +22,7 @@ target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" - "dbt_modules" + - "dbt_packages" # Configuring models # Full documentation: https://docs.getdbt.com/docs/configuring-models @@ -47,3 +48,6 @@ models: seeds: +quote_columns: false + +vars: + "dbt_date:time_zone": GMT \ No newline at end of file diff --git a/macros/tests/sequence_gaps.sql b/macros/tests/sequence_gaps.sql new file mode 100644 index 0000000..9425003 --- /dev/null +++ b/macros/tests/sequence_gaps.sql @@ -0,0 +1,34 @@ +{% test sequence_gaps( + model, + partition_by, + column_name +) %} +{%- set partition_sql = partition_by | join(", ") -%} +{%- set previous_column = "prev_" ~ column_name -%} +WITH source AS ( + SELECT + {{ partition_sql + "," if partition_sql }} + {{ column_name }}, + LAG( + {{ column_name }}, + 1 + ) over ( + {{ "PARTITION BY " ~ partition_sql if partition_sql }} + ORDER BY + {{ column_name }} ASC + ) AS {{ previous_column }} + FROM + {{ model }} +) +SELECT + {{ partition_sql + "," if partition_sql }} + {{ previous_column }}, + {{ column_name }}, + {{ column_name }} - {{ previous_column }} + - 1 AS gap +FROM + source +WHERE + {{ column_name }} - {{ previous_column }} <> 1 +ORDER BY + gap DESC {% endtest %} diff --git a/macros/tests/tx_gaps.sql b/macros/tests/tx_gaps.sql new file mode 100644 index 0000000..48fdcfd --- /dev/null +++ b/macros/tests/tx_gaps.sql @@ -0,0 +1,33 @@ +{% macro tx_gaps( + model + ) %} + WITH block_base AS ( + SELECT + block_id, + tx_count + FROM + {{ ref('blocks') }} + ), + model_name AS ( + SELECT + block_id, + COUNT( + DISTINCT tx_hash + ) AS model_tx_count + FROM + {{ model }} + GROUP BY + block_id + ) +SELECT + block_base.block_id, + tx_count, + model_name.block_id, + model_tx_count +FROM + block_base + LEFT JOIN model_name + ON block_base.block_id = model_name.block_id +WHERE + tx_count <> model_tx_count +{% endmacro %} diff --git a/models/Sushi/ez_sushi_swaps.sql b/models/Sushi/ez_sushi_swaps.sql new file mode 100644 index 0000000..2a2d4f3 --- /dev/null +++ b/models/Sushi/ez_sushi_swaps.sql @@ -0,0 +1,172 @@ +{{ config( + materialized = 'incremental', + persist_docs ={ "relation": true, + "columns": true }, + unique_key = 'log_id', + cluster_by = ['block_timestamp::DATE'], + copy_grants = true +) }} + + +with +swap_without_prices as +( +select +se.block_timestamp, +se.block_id as block_number, +se.tx_hash, +se.pool_address, +CASE wHEN se.amount0In <> 0 and se.amount1In <> 0 THEN amount1In / power(10, token1.decimals ) :: FLOAT + WHEN se.amount0In <> 0 THEN se.amount0In / power(10, token0.decimals)::float + WHEN se.amount1In <> 0 THEN se.amount1In/ power(10, token1.decimals)::float + END AS amount_in, +CASE + WHEN se.amount0Out <> 0 THEN se.amount0Out/ power(10, token0.decimals)::float + WHEN se.amount1Out <> 0 THEN se.amount1Out/ power(10, token1.decimals)::float + END as amount_out, +se.from_address as sender, +se.LOG_ID, +se.event_index, +CASE + WHEN se.amount0In <> 0 AND se.amount1In <> 0 THEN token1_address + WHEN se.amount0In <> 0 THEN token0_address + WHEN se.amount1In <> 0 THEN token1_address + END AS token_In, +CASE + WHEN se.amount0Out <> 0 THEN token0_address + WHEN se.amount1Out <> 0 THEN TOKEN1_ADDRESS + END AS token_out, +CASE + WHEN se.amount0In <> 0 AND se.amount1In <> 0 THEN token1_symbol + WHEN se.amount0In <> 0 THEN TOKEN0_SYMBOL + WHEN se.amount1In <> 0 THEN token1_symbol + END AS symbol_In, +CASE + WHEN se.amount0Out <> 0 THEN token0_symbol + WHEN se.amount1Out <> 0 THEN token1_symbol + END AS symbol_out, +se.TO_ADDRESS::string as tx_to, +se.ingested_at +from {{ ref('swaps') }} se --27,288,348 +left join {{ ref('tokens') }} token0 +on se.token0_address = token0.token_address +left join {{ ref('tokens') }} token1 +on se.TOKEN1_ADDRESS = token1.TOKEN_ADDRESS --27,288,348 +where 1 = 1 + +{% if is_incremental() %} +AND block_timestamp >= ( + SELECT + MAX(block_timestamp) :: DATE - 2 + FROM + {{ this }} +) +{% endif %} +), + + + + + +ETH_prices as +( select token_address, + hour, + symbol, + avg(price) as price + from {{ source( + 'ethereum_db_sushi', + 'FACT_HOURLY_TOKEN_PRICES' + ) }} + + WHERE + 1 = 1 + +{% if is_incremental() %} +AND HOUR :: DATE IN ( + SELECT + DISTINCT block_timestamp :: DATE + FROM + swap_without_prices +) +{% else %} + AND HOUR :: DATE >= '2020-05-05' +{% endif %} + +group by token_address, + hour, + symbol), + + + +Harmony_Eth_crosstab as +(select + name, + symbol, +max (case + when platform_id = 'harmony-shard-0' then token_address + else '' end) as harmony_address, +max (case + when platform = 'ethereum' then token_address + else '' end) as eth_address +from {{ source( + 'symbols_cross_tab', + 'MARKET_ASSET_METADATA' + ) }} +group by 1,2 +having harmony_address <> '' and eth_address <> '' +order by 1,2 +), + +Harmony_prices as +(select +ep.token_address, +ep.hour, +ep.symbol, +ep.price, +hec.harmony_Address as harmony_address +from Eth_prices ep +left join Harmony_Eth_crosstab hec +on ep.token_address = hec.eth_Address +) + +select +wp.block_timestamp, +wp.block_number, +wp.tx_hash, +wp.pool_address, +'Sushiswap' as platform, +wp.event_index, +wp.amount_in, +wp.amount_out, +wp.sender, +wp.LOG_ID, +wp.token_In, +wp.token_out, +wp.symbol_In, +wp.symbol_out, +wp.tx_to, +wp.amount_in * pIn.price as amount_in_usd, +wp.amount_out * pOut.price as amount_out_usd, +wp.ingested_at +from swap_without_prices wp +left join Harmony_prices pIn + on lower(token_In) = lower(pIn.harmony_address) + and date_trunc('hour',wp.block_timestamp) = pIn.hour +left join Harmony_prices pOut + on lower(token_out) = lower(pOut.harmony_address) + and date_trunc('hour',wp.block_timestamp) = pOut.hour --27,288,348 +where pool_address in (select token_address + from MDAO_HARMONY.PROD.TOKENS + where token_name = 'SushiSwap LP Token') + + + + + + + + + + + + diff --git a/models/Sushi/ez_sushi_swaps.yml b/models/Sushi/ez_sushi_swaps.yml new file mode 100644 index 0000000..75028b2 --- /dev/null +++ b/models/Sushi/ez_sushi_swaps.yml @@ -0,0 +1,112 @@ +version: 2 +models: + - name: ez_sushi_swaps + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - LOG_ID + columns: + - name: BLOCK_NUMBER + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT + - name: BLOCK_TIMESTAMP + tests: + - not_null + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 1 + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - TIMESTAMP_NTZ + - name: TX_HASH + tests: + - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + - name: POOL_ADDRESS + tests: + - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + + - name: AMOUNT_IN + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT + - name: AMOUNT_OUT + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT + - name: AMOUNT_IN_USD + tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT + - name: AMOUNT_OUT_USD + tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT + - name: TOKEN_IN + tests: + - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + - name: TOKEN_OUT + tests: + - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + - name: SYMBOL_IN + tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - STRING + - VARCHAR + - name: SYMBOL_OUT + tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - STRING + - VARCHAR + - name: SENDER + tests: + - not_null: + where: BLOCK_TIMESTAMP > '2021-08-01' + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + - name: TX_TO + tests: + - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ + - name: PLATFORM + tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - STRING + - VARCHAR + # This section is commented because at the moment we don't really have event indices in Harmony + # - name: EVENT_INDEX + # tests: + # - dbt_expectations.expect_column_values_to_be_in_type_list: + # column_type_list: + # - NUMBER + # - FLOAT + - name: LOG_ID + tests: + - not_null + diff --git a/models/core/blocks.yml b/models/core/blocks.yml index 1519d11..6803f05 100644 --- a/models/core/blocks.yml +++ b/models/core/blocks.yml @@ -3,8 +3,15 @@ version: 2 models: - name: blocks - description: |- + description: This table records all the blocks of Harmony blockchain. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - block_id + - sequence_gaps: + column_name: block_id + where: BLOCK_TIMESTAMP < CURRENT_DATE - 1 columns: - name: block_id @@ -12,11 +19,20 @@ models: tests: - unique - not_null - + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER + - FLOAT - name: block_timestamp description: The timestamp for when the block was collated. tests: - not_null + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 1 + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - TIMESTAMP_NTZ - name: ingested_at description: The timestamp for when the block was ingested. @@ -32,8 +48,9 @@ models: - name: block_parent_hash description: Hash of the parent block (32 Bytes). tests: - - unique - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: gas_limit description: The maximum gas allowed in this block. @@ -49,11 +66,15 @@ models: description: The address of the beneficiary to whom the mining rewards were given. tests: - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: nonce description: Hash of the generated proof-of-work (8 Bytes). tests: - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: size description: Integer the size of this block in bytes. diff --git a/models/core/liquidity_pools.yml b/models/core/liquidity_pools.yml index cac86d9..baa554d 100644 --- a/models/core/liquidity_pools.yml +++ b/models/core/liquidity_pools.yml @@ -3,7 +3,11 @@ version: 2 models: - name: liquidity_pools - description: "Harmony Liquidity Pools" + description: "Harmony Liquidity Pools" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - POOL_ADDRESS columns: - name: pool_address diff --git a/models/core/logs.sql b/models/core/logs.sql index fdc3473..cc4d947 100644 --- a/models/core/logs.sql +++ b/models/core/logs.sql @@ -32,7 +32,7 @@ logs as ( block_timestamp, ingested_at, tx_hash, - value:logIndex::string as event_index, + to_number(Right(value:logIndex::string,length(value:logIndex::string)-2), 'xxxxx') as event_index, value:bech32_address::string as native_contract_address, value:address::string as evm_contract_address, value:decoded:contractName::string as contract_name, diff --git a/models/core/logs.yml b/models/core/logs.yml index bfcc94a..4f979f9 100644 --- a/models/core/logs.yml +++ b/models/core/logs.yml @@ -4,7 +4,16 @@ version: 2 models: - name: logs description: "Harmony Logs" - + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - LOG_ID + - sequence_gaps: + partition_by: + - BLOCK_ID + - TX_HASH + column_name: EVENT_INDEX + where: BLOCK_TIMESTAMP < CURRENT_DATE columns: - name: log_id description: Log identifier composed of tx_hash-event_index @@ -16,11 +25,20 @@ models: description: The block number. tests: - not_null + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - NUMBER - name: block_timestamp description: The timestamp for when the block was collated. tests: - not_null + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 1 + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - TIMESTAMP_NTZ - name: ingested_at description: The timestamp for when the block was ingested. @@ -31,6 +49,8 @@ models: description: Hash of the transaction (32 Bytes). tests: - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: event_index description: Integer of of the log position in the block. @@ -46,6 +66,8 @@ models: description: EVM address of the contract. tests: - not_null + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: contract_name description: Name of the contract called. @@ -54,10 +76,17 @@ models: - name: event_name description: Name of the event executed. tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - STRING + - VARCHAR - name: event_inputs description: Parameter inputs for the event called. tests: + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - OBJECT - name: topics description: Array of 0 to 4 32 Bytes data of indexed log arguments. diff --git a/models/core/swaps.sql b/models/core/swaps.sql index 7575863..647bcf6 100644 --- a/models/core/swaps.sql +++ b/models/core/swaps.sql @@ -20,10 +20,12 @@ logs as ( final as ( select l.block_timestamp, + l.block_id, l.ingested_at, l.log_id, l.tx_hash, l.evm_contract_address as pool_address, + l.event_index, p.token0 as token0_address, t0.token_name as token0_name, t0.token_symbol as token0_symbol, diff --git a/models/core/swaps.yml b/models/core/swaps.yml index 8698f87..a89770a 100644 --- a/models/core/swaps.yml +++ b/models/core/swaps.yml @@ -4,12 +4,22 @@ version: 2 models: - name: swaps description: "Harmony Swaps" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - LOG_ID columns: - name: block_timestamp description: When the trade was made. tests: - not_null + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 1 + - dbt_expectations.expect_column_values_to_be_in_type_list: + column_type_list: + - TIMESTAMP_NTZ - name: ingested_at description: The timestamp for when the block was ingested. @@ -26,7 +36,8 @@ models: description: The transaction hash. tests: - not_null - + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: pool_address description: The pool address that the swap was conducted in. tests: @@ -35,6 +46,8 @@ models: - name: token0_address description: The `token0` address. tests: + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: token0_name description: The `token0` name. @@ -42,12 +55,9 @@ models: - name: token0_symbol description: The `token0` symbol. - tests: - name: amount0In description: The amount of `token0` that went into the swap. - tests: - - not_null - name: amount0Out description: The amount of `token0` that came out from the swap. @@ -57,6 +67,8 @@ models: - name: token1_address description: The `token1` address. tests: + - dbt_expectations.expect_column_values_to_match_regex: + regex: 0[xX][0-9a-fA-F]+ - name: token1_name description: The `token1` name. @@ -64,7 +76,6 @@ models: - name: token1_symbol description: The `token1` symbol. - tests: - name: amount1In description: The amount of `token1` that went into the swap. @@ -76,6 +87,7 @@ models: tests: - not_null + - name: from_address description: The address that triggered the swap. tests: diff --git a/models/core/txs.yml b/models/core/txs.yml index 665e432..cd314a9 100644 --- a/models/core/txs.yml +++ b/models/core/txs.yml @@ -12,7 +12,10 @@ models: description: The time when the block was mined. tests: - not_null - + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 1 + - name: ingested_at description: The timestamp for when the block was ingested. tests: diff --git a/models/sources.yml b/models/sources.yml index 92a2012..a3430f3 100644 --- a/models/sources.yml +++ b/models/sources.yml @@ -95,3 +95,13 @@ sources: description: Market Cap of the coin in USD. - name: timestamp description: When the data was ingested. + - name: ethereum_db_sushi + database: ethereum + schema: CORE + tables: + - name: FACT_HOURLY_TOKEN_PRICES + - name: symbols_cross_tab + database: FLIPSIDE_PROD_DB + schema: SILVER + tables: + - name: MARKET_ASSET_METADATA \ No newline at end of file diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000..37433dd --- /dev/null +++ b/packages.yml @@ -0,0 +1,5 @@ +packages: + - package: calogica/dbt_expectations + version: [">=0.4.0", "<0.9.0"] + - package: dbt-labs/dbt_external_tables + version: 0.8.0 \ No newline at end of file diff --git a/profiles.yml b/profiles.yml index cfb196e..22222f9 100644 --- a/profiles.yml +++ b/profiles.yml @@ -1,4 +1,4 @@ -default: +harmony: target: dev outputs: dev: @@ -16,4 +16,4 @@ default: client_session_keep_alive: False query_tag: harmony config: - send_anonymous_usage_stats: False \ No newline at end of file + send_anonymous_usage_stats: False diff --git a/tests/tx_gap.sql b/tests/tx_gap.sql new file mode 100644 index 0000000..fc1f5fa --- /dev/null +++ b/tests/tx_gap.sql @@ -0,0 +1,2 @@ +-- depends_on: {{ ref('blocks') }} +{{ tx_gaps(ref("txs")) }}