Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
breakoutModewide
languagesql
with 
customer_bulk_rows as (
    -- everythign is grouped by hashed email to count as a unique customer
    select billing.email_hash,
    -- initial items on the order (except kit components)
    ARRAY(
        select as struct item.merchant_item_id from UNNEST(o1.items) item where kit_component is false
    ) as initial_item_ids,
    -- advertising sources
    (
    select value from UNNEST(properties) where name = 'ucasource'
    ) as utm_source,
    (
    select value from UNNEST(properties) where name = 'ucacampaign'
    ) as utm_campaign,
    affiliates[SAFE_OFFSET(0)].affiliate_oid,
    -- core values about each order that we want in an array.  
    ARRAY(
        select as struct order_id, summary.total.value as total_revenue, creation_dts, cast(creation_dts as date) as creation_date
        from `my-data-warehouse.my_dataset.uc_orders` o2 where o2.billing.email_hash = o1.billing.email_hash
    ) as order_array,
    -- clever use of run numbering which will be filtered in the next query to prevent correlation error
    ROW_NUMBER() OVER ( PARTITION BY billing.email_hash ORDER BY creation_dts ) AS rownum,
    from `my-data-warehouse.my_dataset.uc_orders` o1
    where billing.email_hash is not null
    order by billing.email_hash
),
customer_rows as (
    select *,
    -- calculate their cohort startin group
    (
        select DATE_TRUNC(min(creation_date), MONTH) from UNNEST(order_array)
    ) as cohort_group,
    -- calculate the last period they had any activity
    (
        select DATE_TRUNC(max(creation_date), MONTH) from UNNEST(order_array)
    ) as end_period_start_of_month
    from customer_bulk_rows 
    -- clever filtering to the first row number
    where rownum = 1
),
monthly_date_range_rows as (
  -- generate monthly periods since our first order
  SELECT first_of_month, DATE_SUB(DATE_ADD(first_of_month, INTERVAL 1 MONTH), INTERVAL 1 DAY) as end_of_month
  FROM UNNEST(
    GENERATE_DATE_ARRAY(DATE_TRUNC((select cast(min(creation_dts) as date) from `my-data-warehouse.my_dataset.uc_orders`), MONTH), CURRENT_DATE(), INTERVAL 1 MONTH)
  ) as first_of_month
),
customer_cohort_intermediate_rows as (
    -- grab the main data, but strip off columns we don't need to output
    select * except (order_array, end_period_start_of_month, rownum),
    -- roll up LTV across all the orders
    (
        select sum(total_revenue) from UNNEST(order_array)
    ) as ltv,
    -- generate the periods
    array (
        select as struct 
        -- nice incremently number by period
        ROW_NUMBER() OVER ( PARTITION BY email_hash ) AS period_number,
        -- date range of the period
        period.first_of_month, period.end_of_month, 
        -- number of orders in the period
        count(distinct(order_id)) as period_order_count,
        -- revenue in the period
        coalesce(sum(total_revenue), 0) as period_total_revenue,
        -- array of orders that occurred during this period
        (
            SELECT ARRAY_AGG(t)
            FROM (SELECT DISTINCT * FROM UNNEST(order_array) v where v.creation_date between period.first_of_month and period.end_of_month) t        
        ) as orders   
        from UNNEST(order_array) o
        RIGHT OUTER JOIN monthly_date_range_rows period on o.creation_dts between period.first_of_month and period.end_of_month 
        -- only look at periods that are between their first and last order
        where period.first_of_month >= cohort_group and period.first_of_month <= end_period_start_of_month
        group by period.first_of_month, period.end_of_month
    ) as periods
    from customer_rows
    order by cohort_group, email_hash
),
customer_cohort_rows as (
  select *,
  -- add in a count of the number of total periods
  ARRAY_LENGTH(periods) as period_count
  from customer_cohort_intermediate_rows
), 
customer_cohort_agg_rows as (
  select cohort_group, p.period_number, sum(p.period_total_revenue) as total_revenue
  from customer_cohort_rows 
  CROSS JOIN UNNEST(periods) p
  -- TODO: This is where you would filter down customers based upon the traffic source, affiliate id or initial item id
  -- where utm_source = 'google'  
  group by cohort_group, p.period_number
  order by cohort_group, p.period_number
)
-- Output a pivoted result of the revenue for each cohort group over the first 12 months for those customers
select * 
from customer_cohort_agg_rows
PIVOT(sum(total_revenue) as period_total_revenue FOR period_number IN (
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
))
-- Start from a particular month and go forward
where cohort_group >= cast('2021-01-01' as date)
order by cohort_group

Auto Order Cohort Revenue

Code Block
breakoutModewide
languagesql
with 
customer_bulk_rows as (
    -- everythign is grouped by hashed email to count as a unique customer
    select auto_order_oid as customer_key,
    -- initial items on the order (except kit components)
    ARRAY(
        select as struct item.original_item_id from UNNEST(ao1.items) item
    ) as initial_item_ids,
    -- advertising sources
    (
    select value from UNNEST(ao1.original_order.properties) where name = 'ucasource'
    ) as utm_source,
    (
    select value from UNNEST(ao1.original_order.properties) where name = 'ucacampaign'
    ) as utm_campaign,
    ao1.original_order.affiliates[SAFE_OFFSET(0)].affiliate_oid,
    -- core values about each order that we want in an array.  
    ARRAY(
        select as struct ao1.original_order.order_id, ao1.original_order.summary.total.value as total_revenue, ao1.original_order.creation_dts, cast(ao1.original_order.creation_dts as date) as creation_date
        union all 
        select as struct order_id, summary.total.value as total_revenue, creation_dts, cast(creation_dts as date) as creation_date
        from ao1.rebill_orders
    ) as order_array,
    -- clever use of run numbering which will be filtered in the next query to prevent correlation error
    ROW_NUMBER() OVER ( PARTITION BY ao1.auto_order_oid ) AS rownum,
    from `my-data-warehouse.my_dataset.uc_auto_orders` ao1
    order by auto_order_oid
),
customer_rows as (
    select *,
    -- calculate their cohort startin group
    (
        select DATE_TRUNC(min(creation_date), MONTH) from UNNEST(order_array)
    ) as cohort_group,
    -- calculate the last period they had any activity
    (
        select DATE_TRUNC(max(creation_date), MONTH) from UNNEST(order_array)
    ) as end_period_start_of_month
    from customer_bulk_rows 
    -- clever filtering to the first row number
    where rownum = 1
),
monthly_date_range_rows as (
  -- generate monthly periods since our first order
  SELECT first_of_month, DATE_SUB(DATE_ADD(first_of_month, INTERVAL 1 MONTH), INTERVAL 1 DAY) as end_of_month
  FROM UNNEST(
    GENERATE_DATE_ARRAY(DATE_TRUNC((select cast(min(original_order.creation_dts) as date) from `my-data-warehouse.my_dataset.uc_auto_orders`), MONTH), CURRENT_DATE(), INTERVAL 1 MONTH)
  ) as first_of_month
),
customer_cohort_intermediate_rows as (
    -- grab the main data, but strip off columns we don't need to output
    select * except (order_array, end_period_start_of_month, rownum),
    -- roll up LTV across all the orders
    (
        select sum(total_revenue) from UNNEST(order_array)
    ) as ltv,
    -- generate the periods
    array (
        select as struct 
        -- nice incremently number by period
        ROW_NUMBER() OVER ( PARTITION BY customer_key ) AS period_number,
        -- date range of the period
        period.first_of_month, period.end_of_month, 
        -- number of orders in the period
        count(distinct(order_id)) as period_order_count,
        -- revenue in the period
        coalesce(sum(total_revenue), 0) as period_total_revenue,
        -- array of orders that occurred during this period
        (
            SELECT ARRAY_AGG(t)
            FROM (SELECT DISTINCT * FROM UNNEST(order_array) v where v.creation_date between period.first_of_month and period.end_of_month) t        
        ) as orders   
        from UNNEST(order_array) o
        RIGHT OUTER JOIN monthly_date_range_rows period on o.creation_dts between period.first_of_month and period.end_of_month 
        -- only look at periods that are between their first and last order
        where period.first_of_month >= cohort_group and period.first_of_month <= end_period_start_of_month
        group by period.first_of_month, period.end_of_month
    ) as periods
    from customer_rows
    order by cohort_group, customer_key
),
customer_cohort_rows as (
  select *,
  -- add in a count of the number of total periods
  ARRAY_LENGTH(periods) as period_count
  from customer_cohort_intermediate_rows
), 
customer_cohort_agg_rows as (
  select cohort_group, p.period_number, sum(p.period_total_revenue) as total_revenue
  from customer_cohort_rows 
  CROSS JOIN UNNEST(periods) p
  -- TODO: This is where you would filter down customers based upon the traffic source, affiliate id or initial item id
  -- where utm_source = 'google'  
  group by cohort_group, p.period_number
  order by cohort_group, p.period_number
)
-- Output a pivoted result of the revenue for each cohort group over the first 12 months for those customers
select * 
from customer_cohort_agg_rows
PIVOT(sum(total_revenue) as period_total_revenue FOR period_number IN (
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
))
-- Start from a particular month and go forward
where cohort_group >= cast('2021-01-01' as date)
order by cohort_group

Related Documentation

https://towardsdatascience.com/how-to-monitor-repeat-purchase-behavior-using-google-data-studio-and-bigquery-c2b5adbe8ebb