Code Block

breakoutMode	wide
language	sql

with 
customer_bulk_rows as (
    -- everythign is grouped by hashed email to count as a unique customer
    select billing.email_hash,
    -- initial items on the order (except kit components)
    ARRAY(
        select as struct item.merchant_item_id from UNNEST(o1.items) item where kit_component is false
    ) as initial_item_ids,
    -- advertising sources
    (
    select value from UNNEST(properties) where name = 'ucasource'
    ) as utm_source,
    (
    select value from UNNEST(properties) where name = 'ucacampaign'
    ) as utm_campaign,
    affiliates[SAFE_OFFSET(0)].affiliate_oid,
    -- core values about each order that we want in an array.  
    ARRAY(
        select as struct order_id, summary.total.value as total_revenue, creation_dts, cast(creation_dts as date) as creation_date
        from `my-data-warehouse.my_dataset.uc_orders` o2 where o2.billing.email_hash = o1.billing.email_hash
    ) as order_array,
    -- clever use of run numbering which will be filtered in the next query to prevent correlation error
    ROW_NUMBER() OVER ( PARTITION BY billing.email_hash ORDER BY creation_dts ) AS rownum,
    from `my-data-warehouse.my_dataset.uc_orders` o1
    where billing.email_hash is not null
    order by billing.email_hash
),
customer_rows as (
    select *,
    -- calculate their cohort startin group
    (
        select DATE_TRUNC(min(creation_date), MONTH) from UNNEST(order_array)
    ) as cohort_group,
    -- calculate the last period they had any activity
    (
        select DATE_TRUNC(max(creation_date), MONTH) from UNNEST(order_array)
    ) as end_period_start_of_month
    from customer_bulk_rows 
    -- clever filtering to the first row number
    where rownum = 1
),
monthly_date_range_rows as (
  -- generate monthly periods since our first order
  SELECT first_of_month, DATE_SUB(DATE_ADD(first_of_month, INTERVAL 1 MONTH), INTERVAL 1 DAY) as end_of_month
  FROM UNNEST(
    GENERATE_DATE_ARRAY(DATE_TRUNC((select cast(min(creation_dts) as date) from `my-data-warehouse.my_dataset.uc_orders`), MONTH), CURRENT_DATE(), INTERVAL 1 MONTH)
  ) as first_of_month
),
customer_cohort_intermediate_rows as (
    -- grab the main data, but strip off columns we don't need to output
    select * except (order_array, end_period_start_of_month, rownum),
    -- roll up LTV across all the orders
    (
        select sum(total_revenue) from UNNEST(order_array)
    ) as ltv,
    -- generate the periods
    array (
        select as struct 
        -- nice incremently number by period
        ROW_NUMBER() OVER ( PARTITION BY email_hash ) AS period_number,
        -- date range of the period
        period.first_of_month, period.end_of_month, 
        -- number of orders in the period
        count(distinct(order_id)) as period_order_count,
        -- revenue in the period
        coalesce(sum(total_revenue), 0) as period_total_revenue,
        -- array of orders that occurred during this period
        (
            SELECT ARRAY_AGG(t)
            FROM (SELECT DISTINCT * FROM UNNEST(order_array) v where v.creation_date between period.first_of_month and period.end_of_month) t        
        ) as orders   
        from UNNEST(order_array) o
        RIGHT OUTER JOIN monthly_date_range_rows period on o.creation_dts between period.first_of_month and period.end_of_month 
        -- only look at periods that are between their first and last order
        where period.first_of_month >= cohort_group and period.first_of_month <= end_period_start_of_month
        group by period.first_of_month, period.end_of_month
    ) as periods
    from customer_rows
    order by cohort_group, email_hash
),
customer_cohort_rows as (
  select *,
  -- add in a count of the number of total periods
  ARRAY_LENGTH(periods) as period_count
  from customer_cohort_intermediate_rows
), 
customer_cohort_agg_rows as (
  select cohort_group, p.period_number, sum(p.period_total_revenue) as total_revenue
  from customer_cohort_rows 
  CROSS JOIN UNNEST(periods) p
  -- TODO: This is where you would filter down customers based upon the traffic source, affiliate id or initial item id
  -- where utm_source = 'google'  
  group by cohort_group, p.period_number
  order by cohort_group, p.period_number
)
-- Output a pivoted result of the revenue for each cohort group over the first 12 months for those customers
select * 
from customer_cohort_agg_rows
PIVOT(sum(total_revenue) as period_total_revenue FOR period_number IN (
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
))
-- Start from a particular month and go forward
where cohort_group >= cast('2021-01-01' as date)
order by cohort_group

Auto Order Cohort Revenue

Code Block

breakoutMode	wide
language	sql

with 
customer_bulk_rows as (
    -- everythign is grouped by hashed email to count as a unique customer
    select auto_order_oid as customer_key,
    -- initial items on the order (except kit components)
    ARRAY(
        select as struct item.original_item_id from UNNEST(ao1.items) item
    ) as initial_item_ids,
    -- advertising sources
    (
    select value from UNNEST(ao1.original_order.properties) where name = 'ucasource'
    ) as utm_source,
    (
    select value from UNNEST(ao1.original_order.properties) where name = 'ucacampaign'
    ) as utm_campaign,
    ao1.original_order.affiliates[SAFE_OFFSET(0)].affiliate_oid,
    -- core values about each order that we want in an array.  
    ARRAY(
        select as struct ao1.original_order.order_id, ao1.original_order.summary.total.value as total_revenue, ao1.original_order.creation_dts, cast(ao1.original_order.creation_dts as date) as creation_date
        union all 
        select as struct order_id, summary.total.value as total_revenue, creation_dts, cast(creation_dts as date) as creation_date
        from ao1.rebill_orders
    ) as order_array,
    -- clever use of run numbering which will be filtered in the next query to prevent correlation error
    ROW_NUMBER() OVER ( PARTITION BY ao1.auto_order_oid ) AS rownum,
    from `my-data-warehouse.my_dataset.uc_auto_orders` ao1
    order by auto_order_oid
),
customer_rows as (
    select *,
    -- calculate their cohort startin group
    (
        select DATE_TRUNC(min(creation_date), MONTH) from UNNEST(order_array)
    ) as cohort_group,
    -- calculate the last period they had any activity
    (
        select DATE_TRUNC(max(creation_date), MONTH) from UNNEST(order_array)
    ) as end_period_start_of_month
    from customer_bulk_rows 
    -- clever filtering to the first row number
    where rownum = 1
),
monthly_date_range_rows as (
  -- generate monthly periods since our first order
  SELECT first_of_month, DATE_SUB(DATE_ADD(first_of_month, INTERVAL 1 MONTH), INTERVAL 1 DAY) as end_of_month
  FROM UNNEST(
    GENERATE_DATE_ARRAY(DATE_TRUNC((select cast(min(original_order.creation_dts) as date) from `my-data-warehouse.my_dataset.uc_auto_orders`), MONTH), CURRENT_DATE(), INTERVAL 1 MONTH)
  ) as first_of_month
),
customer_cohort_intermediate_rows as (
    -- grab the main data, but strip off columns we don't need to output
    select * except (order_array, end_period_start_of_month, rownum),
    -- roll up LTV across all the orders
    (
        select sum(total_revenue) from UNNEST(order_array)
    ) as ltv,
    -- generate the periods
    array (
        select as struct 
        -- nice incremently number by period
        ROW_NUMBER() OVER ( PARTITION BY customer_key ) AS period_number,
        -- date range of the period
        period.first_of_month, period.end_of_month, 
        -- number of orders in the period
        count(distinct(order_id)) as period_order_count,
        -- revenue in the period
        coalesce(sum(total_revenue), 0) as period_total_revenue,
        -- array of orders that occurred during this period
        (
            SELECT ARRAY_AGG(t)
            FROM (SELECT DISTINCT * FROM UNNEST(order_array) v where v.creation_date between period.first_of_month and period.end_of_month) t        
        ) as orders   
        from UNNEST(order_array) o
        RIGHT OUTER JOIN monthly_date_range_rows period on o.creation_dts between period.first_of_month and period.end_of_month 
        -- only look at periods that are between their first and last order
        where period.first_of_month >= cohort_group and period.first_of_month <= end_period_start_of_month
        group by period.first_of_month, period.end_of_month
    ) as periods
    from customer_rows
    order by cohort_group, customer_key
),
customer_cohort_rows as (
  select *,
  -- add in a count of the number of total periods
  ARRAY_LENGTH(periods) as period_count
  from customer_cohort_intermediate_rows
), 
customer_cohort_agg_rows as (
  select cohort_group, p.period_number, sum(p.period_total_revenue) as total_revenue
  from customer_cohort_rows 
  CROSS JOIN UNNEST(periods) p
  -- TODO: This is where you would filter down customers based upon the traffic source, affiliate id or initial item id
  -- where utm_source = 'google'  
  group by cohort_group, p.period_number
  order by cohort_group, p.period_number
)
-- Output a pivoted result of the revenue for each cohort group over the first 12 months for those customers
select * 
from customer_cohort_agg_rows
PIVOT(sum(total_revenue) as period_total_revenue FOR period_number IN (
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
))
-- Start from a particular month and go forward
where cohort_group >= cast('2021-01-01' as date)
order by cohort_group

Versions Compared

Old Version 46

New Version 47

Key

Auto Order Cohort Revenue

Related Documentation

Page Comparison

Versions Compared

Old Version 46

New Version 47

Key

Auto Order Cohort Revenue

Related Documentation