Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version auto upgrade #18

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions models/localMacros.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
macros:
- name: macro_datediff
inputs:
- column
value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) {% else %} datediff(day, date({{column}}::timestamp), GETDATE()) {% endif %}"
- name: macro_datediff_n
inputs:
- column
- number_of_days
value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) <={{number_of_days}} {% else %} datediff(day, date({{column}}), GETDATE()) <= {{number_of_days}} {% endif %}"
29 changes: 6 additions & 23 deletions models/profiles-ml.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ models:
entity_key: user
validity_time: 24h # 1 day
py_repo_url: [email protected]:rudderlabs/rudderstack-profiles-classifier.git

train:
file_extension: .json
file_validity: 168h # If the last trained model is older than this, then the model will be trained again,
Expand All @@ -21,16 +20,13 @@ models:
prediction_horizon_days: 7 # Number of days in future for which we want to predict
features_profiles_model: 'rudder_user_base_features' # Model name
output_profiles_ml_model: *model_name_7_days # Name of output model based on current model to dinstinguish between multiple models
eligible_users:
eligible_users:
inputs: *inputs_7_days


preprocessing: &model_prep_configs_7_days
ignore_features:
- user_email
- first_name
- last_name

predict:
inputs:
- models/rudder_user_base_features
Expand All @@ -45,17 +41,14 @@ models:
features:
- name: *percentile_name_7_days
description: 'Percentile of churn score. Higher the percentile, higher the probability of churn'

<<: *feature_meta_data_7_days

!!merge <<: *feature_meta_data_7_days
- name: &model_name_30_days churn_30_days_model
model_type: python_model
model_spec:
occurred_at_col: insert_ts
entity_key: user
validity_time: 24h # 1 day
py_repo_url: [email protected]:rudderlabs/rudderstack-profiles-classifier.git

train:
file_extension: .json
file_validity: 168h # If the last trained model is older than this, then the model will be trained again,
Expand All @@ -69,16 +62,13 @@ models:
prediction_horizon_days: 30 # Number of days in future for which we want to predict
features_profiles_model: 'rudder_user_base_features' # Model name
output_profiles_ml_model: *model_name_30_days # Name of output model based on current model to dinstinguish between multiple models
eligible_users:
eligible_users:
inputs: *inputs_30_days


preprocessing: &model_prep_configs_30_days
ignore_features:
- user_email
- first_name
- last_name

predict:
inputs:
- models/rudder_user_base_features
Expand All @@ -93,17 +83,14 @@ models:
features:
- name: *percentile_name_30_days
description: 'Percentile of churn score. Higher the percentile, higher the probability of churn'

<<: *feature_meta_data_30_days

!!merge <<: *feature_meta_data_30_days
- name: &model_name_90_days churn_90_days_model
model_type: python_model
model_spec:
occurred_at_col: insert_ts
entity_key: user
validity_time: 24h # 1 day
py_repo_url: [email protected]:rudderlabs/rudderstack-profiles-classifier.git

train:
file_extension: .json
file_validity: 168h # If the last trained model is older than this, then the model will be trained again,
Expand All @@ -117,16 +104,13 @@ models:
prediction_horizon_days: 90 # Number of days in future for which we want to predict
features_profiles_model: 'rudder_user_base_features' # Model name
output_profiles_ml_model: *model_name_90_days # Name of output model based on current model to dinstinguish between multiple models
eligible_users:
eligible_users:
inputs: *inputs_90_days


preprocessing: &model_prep_configs_90_days
ignore_features:
- user_email
- first_name
- last_name

predict:
inputs:
- models/rudder_user_base_features
Expand All @@ -141,5 +125,4 @@ models:
features:
- name: *percentile_name_90_days
description: 'Percentile of churn score. Higher the percentile, higher the probability of churn'

<<: *feature_meta_data_90_days
!!merge <<: *feature_meta_data_90_days
59 changes: 24 additions & 35 deletions models/profiles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,12 @@ models:
- name: context_campaign_source
- name: context_campaign_medium
- name: timestamp
- name: user_main_id
- name: user_main_id
- name: rudder_user_base_features
model_type: feature_table_model
model_spec:
validity_time: 24h # 1 day
entity_key: user
macros:
- name: macro_datediff
inputs:
- column
value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) {% else %} datediff(day, date({{column}}::timestamp), GETDATE()) {% endif %}"
- name: macro_datediff_n
inputs:
- column
- number_of_days
value: "{% if !(end_time|isnil) %} datediff(day, date({{column}}), date('{{end_time.Format(\"2006-01-02 15:04:05\")}}')) <={{number_of_days}} {% else %} datediff(day, date({{column}}), GETDATE()) <= {{number_of_days}} {% endif %}"
vars:
- entity_var:
name: max_timestamp_bw_tracks_pages
Expand All @@ -67,27 +57,27 @@ models:
#days since last seen
- entity_var:
name: days_since_last_seen
select: "{{macro_datediff('max_timestamp_bw_tracks_pages')}}"
select: "{{macro_datediff('{{user.Var(\"max_timestamp_bw_tracks_pages\")}}')}}"
dependencies:
- max_timestamp_bw_tracks_pages
#Churn features
- entity_var:
name: is_churned_7_days
select: case when days_since_last_seen > 7 then 1 else 0 end
select: case when {{user.Var("days_since_last_seen")}} > 7 then 1 else 0 end
description: Depending on the n value, it specifies if there is any activity observed in the last 7 days.
dependencies:
- days_since_last_seen
- session_end_time
- entity_var:
name: is_churned_30_days
select: case when days_since_last_seen > 30 then 1 else 0 end
select: case when {{user.Var("days_since_last_seen")}} > 30 then 1 else 0 end
description: Depending on the n value, it specifies if there is any activity observed in the last 30 days.
dependencies:
- days_since_last_seen
- session_end_time
- entity_var:
name: is_churned_90_days
select: case when days_since_last_seen > 90 then 1 else 0 end
select: case when {{user.Var("days_since_last_seen")}} > 90 then 1 else 0 end
description: Depending on the n value, it specifies if there is any activity observed in the last 90 days.
dependencies:
- days_since_last_seen
Expand All @@ -99,11 +89,11 @@ models:
- entity_var:
name: state
from: inputs/rsIdentifies
select: first_value(state)
select: first_value({{user.Var("state")}})
window:
order_by:
- timestamp desc
where: state is not null and state!=''
where: '{{user.Var("state")}} is not null and {{user.Var("state")}}!='''''
- entity_var:
name: country
from: inputs/rsIdentifies
Expand All @@ -115,27 +105,27 @@ models:
- entity_var:
name: first_name
from: inputs/rsIdentifies
select: first_value(first_name)
select: first_value({{user.Var("first_name")}})
window:
order_by:
- timestamp desc
where: first_name is not null and first_name!=''
where: '{{user.Var("first_name")}} is not null and {{user.Var("first_name")}}!='''''
- entity_var:
name: last_name
from: inputs/rsIdentifies
select: first_value(last_name)
select: first_value({{user.Var("last_name")}})
window:
order_by:
- timestamp desc
where: last_name is not null and last_name!=''
where: '{{user.Var("last_name")}} is not null and {{user.Var("last_name")}}!='''''
- entity_var:
name: currency
from: inputs/rsIdentifies
select: first_value(currency)
select: first_value({{user.Var("currency")}})
window:
order_by:
- timestamp desc
where: currency is not null and currency!=''
where: '{{user.Var("currency")}} is not null and {{user.Var("currency")}}!='''''
dependencies:
- session_start_time
- entity_var:
Expand Down Expand Up @@ -195,29 +185,29 @@ models:
name: total_sessions_last_week
from: models/rsTracksUnionPages
select: count(distinct context_session_id)
where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 7"
where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 7"
description: total number of sessions over last 7 days.
dependencies:
- session_row_number
- entity_var:
name: total_sessions_90_days
from: models/rsTracksUnionPages
select: count(distinct context_session_id)
where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 90"
where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 90"
description: total number of sessions over last 90 days.
- entity_var:
name: total_sessions_365_days
from: models/rsTracksUnionPages
select: count(distinct context_session_id)
where: " context_session_id is not null and {{macro_datediff('session_start_time')}} between 0 and 365 "
where: " context_session_id is not null and {{macro_datediff('{{rsTracksUnionPages.Var(\"session_start_time\")}}')}} between 0 and 365 "
description: total number of sessions over last 356 days.
dependencies:
- session_start_time
- entity_var:
name: avg_session_length_in_sec_overall
from: models/rsTracksUnionPages
select: avg(datediff(second, session_start_time, session_end_time))
where: session_row_number = 1 and context_session_id is not null
select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}}))
where: '{{rsTracksUnionPages.Var("session_row_number")}} = 1 and context_session_id is not null'
description: Average session length (in seconds) of all the user sessions till date.
dependencies:
- session_row_number
Expand All @@ -226,8 +216,8 @@ models:
- entity_var:
name: avg_session_length_in_sec_last_week
from: models/rsTracksUnionPages
select: avg(datediff(second, session_start_time, session_end_time))
where: context_session_id is not null and session_row_number = 1 and {{macro_datediff('session_start_time')}} between 0 and 7
select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}}))
where: context_session_id is not null and {{rsTracksUnionPages.Var("session_row_number")}} = 1 and {{macro_datediff('{{rsTracksUnionPages.Var("session_start_time")}}')}} between 0 and 7
description: Average session length (in seconds) of all the user sessions that started in last 7 days
dependencies:
- session_start_time
Expand All @@ -236,8 +226,8 @@ models:
- entity_var:
name: avg_session_length_in_sec_365_days
from: models/rsTracksUnionPages
select: avg(datediff(second, session_start_time, session_end_time))
where: "context_session_id is not null and session_row_number = 1 and {{macro_datediff_n('session_start_time','365')}}"
select: avg(datediff(second, {{rsTracksUnionPages.Var("session_start_time")}}, {{rsTracksUnionPages.Var("session_end_time")}}))
where: "context_session_id is not null and {{rsTracksUnionPages.Var(\"session_row_number\")}} = 1 and {{macro_datediff_n('{{rsTracksUnionPages.Var(\"session_start_time\")}}','365')}}"
description: Average session length (in seconds) of all the user sessions that started in last 365 days
dependencies:
- session_row_number
Expand All @@ -246,14 +236,14 @@ models:
- entity_var:
name: first_seen_date
from: models/rsTracksUnionPages
select: min(date(session_start_time))
select: min(date({{rsTracksUnionPages.Var("session_start_time")}}))
description: The first date on which an event has been recorded by the user
dependencies:
- session_start_time
- entity_var:
name: last_seen_date
from: models/rsTracksUnionPages
select: max(date(session_end_time))
select: max(date({{rsTracksUnionPages.Var("session_end_time")}}))
description: The latest date on which an event has been recorded by the user
dependencies:
- session_end_time
Expand Down Expand Up @@ -340,4 +330,3 @@ models:
- campaigns_list
- mediums_list
- sources_list

1 change: 1 addition & 0 deletions original_project_folder
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you delete this file from the project?

2 changes: 1 addition & 1 deletion pb_project.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Project name
name: base_features
# Project's yaml schema version
schema_version: 42
schema_version: 44
# WH Connection Profile to use.
connection: dev_wh
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you name the connection as default? This is required for the python models to run properly from the UI.

# Whether to allow inputs having no timestamps,
Expand Down