with
customer_bulk_rows as (
-- everything is grouped by hashed email to count as a unique customer
select auto_order_oid as customer_key,
-- initial items on the order (except kit components)
ARRAY(
select as struct item.original_item_id from UNNEST(ao1.items) item
) as initial_item_ids,
-- advertising sources
(
select value from UNNEST(ao1.original_order.properties) where name = 'ucasource'
) as utm_source,
(
select value from UNNEST(ao1.original_order.properties) where name = 'ucacampaign'
) as utm_campaign,
ao1.original_order.affiliates[SAFE_OFFSET(0)].affiliate_oid,
-- is the auto order still active
DATE_TRUNC(coalesce(ao1.disabled_dts, ao1.canceled_dts), day) as churn_date,
-- core values about each order that we want in an array.
ARRAY(
select as struct ao1.original_order.order_id, ao1.original_order.summary.total.value as total_revenue, ao1.original_order.creation_dts, cast(ao1.original_order.creation_dts as date) as creation_date
union all
select as struct order_id, summary.total.value as total_revenue, creation_dts, cast(creation_dts as date) as creation_date
from ao1.rebill_orders
) as order_array,
-- clever use of run numbering which will be filtered in the next query to prevent correlation error
ROW_NUMBER() OVER ( PARTITION BY ao1.auto_order_oid ) AS rownum,
from `my-data-warehouse.my_dataset.uc_auto_orders` ao1
order by auto_order_oid
),
customer_rows as (
select *,
-- calculate their cohort startin group
(
select DATE_TRUNC(min(creation_date), MONTH) from UNNEST(order_array)
) as cohort_group,
-- calculate the last period they had any activity
DATE_TRUNC(CURRENT_DATE(), MONTH) as end_period_start_of_month
from customer_bulk_rows
-- clever filtering to the first row number
where rownum = 1
),
monthly_date_range_rows as (
-- generate monthly periods since our first order
SELECT first_of_month, DATE_SUB(DATE_ADD(first_of_month, INTERVAL 1 MONTH), INTERVAL 1 DAY) as end_of_month
FROM UNNEST(
GENERATE_DATE_ARRAY(DATE_TRUNC((select cast(min(original_order.creation_dts) as date) from `my-data-warehouse.my_dataset.uc_auto_orders`), MONTH), CURRENT_DATE(), INTERVAL 1 MONTH)
) as first_of_month
),
customer_cohort_intermediate_rows as (
-- grab the main data, but strip off columns we don't need to output
select * except (order_array, end_period_start_of_month, rownum),
-- roll up LTV across all the orders
(
select sum(total_revenue) from UNNEST(order_array)
) as ltv,
-- generate the periods
array (
select as struct
-- nice incremently number by period
ROW_NUMBER() OVER ( PARTITION BY customer_key ) AS period_number,
-- date range of the period
period.first_of_month, period.end_of_month,
-- number of orders in the period
count(distinct(order_id)) as period_order_count,
-- revenue in the period
coalesce(sum(total_revenue), 0) as period_total_revenue,
-- active still during this period
case when churn_date is null then 1
when churn_date < period.first_of_month then 0
when churn_date >= period.first_of_month then 1
else 0
end as active_during_period,
-- array of orders that occurred during this period
(
SELECT ARRAY_AGG(t)
FROM (SELECT DISTINCT * FROM UNNEST(order_array) v where v.creation_date between period.first_of_month and period.end_of_month) t
) as orders
from UNNEST(order_array) o
RIGHT OUTER JOIN monthly_date_range_rows period on o.creation_dts between period.first_of_month and period.end_of_month
-- only look at periods that are between their first and last order
where period.first_of_month >= cohort_group and period.first_of_month <= end_period_start_of_month
group by period.first_of_month, period.end_of_month
) as periods
from customer_rows
order by cohort_group, customer_key
),
customer_cohort_rows as (
select *,
-- add in a count of the number of total periods
ARRAY_LENGTH(periods) as period_count
from customer_cohort_intermediate_rows
),
customer_cohort_agg_rows as (
select cohort_group, p.period_number, sum(p.active_during_period) as active_auto_order_count
from customer_cohort_rows
CROSS JOIN UNNEST(periods) p
-- TODO: This is where you would filter down customers based upon the traffic source, affiliate id or initial item id
-- where utm_source = 'google'
group by cohort_group, p.period_number
order by cohort_group, p.period_number
),
customer_cohort_agg_rows2 as (
select cohort_group, period_number, ROUND(SAFE_DIVIDE(active_auto_order_count,
(
select max(active_auto_order_count) from customer_cohort_agg_rows ccar2 where ccar2.cohort_group = ccar1.cohort_group
)) * 100.0, 1) as active_percentage
from customer_cohort_agg_rows ccar1
order by cohort_group, period_number
),
period_rows as (
-- Output a pivoted result of the revenue for each cohort group over the first 24 months for those auto order cohorts
select *
from customer_cohort_agg_rows2
PIVOT(sum(active_percentage) as period_active_auto_order_percentage FOR period_number IN (
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
))
-- Start from a particular month and go forward
where cohort_group >= cast('2021-01-01' as date)
order by cohort_group
)
select * from period_rows
|