
WITH
the_parameter_constants AS (
  SELECT
    -- DATE_TRUNC('day', (SYSDATE - INTERVAL '7 hours')) - INTERVAL '1 day' AS range_start
    -- , DATE_TRUNC('day', (SYSDATE - INTERVAL '7 hours')) AS range_end

    -- '2018-08-23'::TIMESTAMP AS range_start
    -- , '2018-08-24'::TIMESTAMP AS range_end

    '2018-01-19'::TIMESTAMP AS range_start
    , '2018-01-20'::TIMESTAMP AS range_end

    -- '2018-06-26'::TIMESTAMP AS range_start
    -- , '2018-06-27'::TIMESTAMP AS range_end

    -- '2017-11-12'::TIMESTAMP AS range_start
    -- , '2017-11-13'::TIMESTAMP AS range_end

    -- $1::TIMESTAMP AS range_start
    -- , DATEADD(day, 1, $1::TIMESTAMP) AS range_end
)
--used to get the time frame to look at the the activation/deactivation/uninstall/delete logs
, the_basetable_date AS (
  SELECT
    '2017-05-01'::TIMESTAMP AS start_date --start from may 1st to capture all possible events
    , date::TIMESTAMP AS end_date
  FROM metadata.date
  WHERE date>=(SELECT range_start FROM the_parameter_constants) AND date<(SELECT range_end FROM the_parameter_constants)
  GROUP BY 1,2
)
, all_released_extension_versions AS (
  SELECT extension_id, extension_version, extension_name, extension_anchor, is_component_enabled, is_overlay_enabled, is_panel_enabled, is_mobile_enabled, is_bits_enabled
  FROM analysis.extensions_metadata_full
  WHERE extension_state='Released'
  GROUP BY 1,2,3,4,5,6,7,8,9
)
-- , the_extensions_constants AS (
--   --SELECT 'fe5a78xkj1lnk9wefpk295p6x3dsvl' AS extension_id --stream avatar
--   --SELECT 'h7683cdyci11yov0tlac9f6lkvgtzi' AS extension_id --Spotify: Now Playing
--   SELECT 'lgpf9j7y8n1ja9onkb6w7bxfkhk2zl' AS extension_id --OP.gg for league of legends
--
-- )
, the_channel_constants AS (
  SELECT 155528531 AS channel_id --a_dot_burr
  --SELECT 11805560 AS channel_id --streamer with op.gg weird data in jan
  --SELECT 136563385 AS channel_id --streamer with multiple anchor for same extension starting 2018-06-25
)















  --get raw install logs and remove duplicates
  , install_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id::BIGINT
    FROM tahoe.extension_install
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
    AND channel_id = (SELECT channel_id FROM the_channel_constants)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , install_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
      , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
      , channel_id
    FROM install_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , install_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_install
    FROM install_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , install_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_install
    FROM (
      SELECT *, MAX(last_time_install) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_install --this max timestamp is extension_version agnostic
      FROM install_logs_step2
    ) AS final
    WHERE last_time_install = max_last_time_install --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day)
  )



  --get raw activate logs and remove duplicates
  , activate_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id::BIGINT, extension_anchor::VARCHAR
    FROM tahoe.extension_activate
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
    AND channel_id = (SELECT channel_id FROM the_channel_constants)
    GROUP BY 1,2,3,4,5,6,7
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , activate_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
    , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
    , channel_id, extension_anchor
    FROM activate_logs_step0
    GROUP BY 1,2,3,4,6,7
  )
  --for each user and extension and version and anchor combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , activate_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, extension_anchor, MAX(time) AS last_time_activate
    FROM activate_logs_step1
    GROUP BY 1,2,3,4,5,6
  )
  --for each user and extension, return 1 row that is the most recent action
  , activate_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.extension_anchor, final.last_time_activate
    FROM (
      SELECT *, MAX(last_time_activate) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_activate --this max timestamp is extension_version agnostic
      FROM activate_logs_step2
    ) AS final
    WHERE last_time_activate = max_last_time_activate --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day, just take the last one)
  )
  --for extensions that were last activated before 2018-06-19, there were no component extensions, so can use the previous extension_anchor value to get their anchor
  , activate_logs_step4 AS (
    SELECT base.start_date, base.end_date, base.extension_id, base.extension_version, base.channel_id
      , NVL(base.extension_anchor, hist.extension_anchor) AS extension_anchor
      , base.last_time_activate
    FROM activate_logs_step3 AS base
    LEFT JOIN analysis.dsds_281_historical_extensions AS hist
      ON base.extension_id = hist.extension_id AND hist.extension_state = 'Released' AND base.last_time_activate<'2018-06-19'::TIMESTAMP
  )













  --get raw activate logs and remove duplicates
  , anchor_changed_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id, extension_anchor_new::VARCHAR
    FROM tahoe.extension_anchor_changed
    WHERE extension_id IS NOT NULL --AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
    -- AND channel_id = (SELECT channel_id FROM the_channel_constants)
    GROUP BY 1,2,3,4,5,6,7
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , anchor_changed_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
    , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
    , channel_id, extension_anchor_new
    FROM anchor_changed_logs_step0
    GROUP BY 1,2,3,4,6,7
  )
  --for each user and extension and version and anchor combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , anchor_changed_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, extension_anchor_new, MAX(time) AS last_time_anchor_changed
    FROM anchor_changed_logs_step1
    GROUP BY 1,2,3,4,5,6
  )
  --for each user and extension, return 1 row that is the most recent action, but there are cases where the same timestamp exists for same extension_id but different extension_anchor, so then use the_random_seed to randomly assign the anchor
  , anchor_changed_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.extension_anchor_new, final.last_time_anchor_changed, RANDOM() AS the_random_seed
    FROM (
      SELECT *, MAX(last_time_anchor_changed) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_anchor_changed --this max timestamp is extension_version agnostic
      FROM anchor_changed_logs_step2
    ) AS final
    WHERE last_time_anchor_changed = max_last_time_anchor_changed --only get the record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day, just take the last one)
  )
  --for each user and extension, get the rank of the_random_seed, then pick only the one row with rank 1 (used to randomly break ties for cases where the same timestamp exists for same extension_id but different extension_anchor)
  , anchor_changed_logs_step4 AS (
    SELECT *
    FROM (
      SELECT *, RANK() OVER (PARTITION BY start_date, end_date, extension_id, extension_version, channel_id ORDER BY the_random_seed DESC) AS the_random_seed_rank
      FROM anchor_changed_logs_step3
    )
    WHERE the_random_seed_rank=1
  )

















  --join install and activate data together using FULL OUTER JOIN
  , install_activate_logs_step0 AS (
    SELECT
      NVL(install.start_date, activate.start_date) AS start_date
      , NVL(install.end_date, activate.end_date) AS end_date
      , (NVL(install.end_date, activate.end_date) - INTERVAL '1 day')::DATE AS the_day
      , NVL(install.extension_id, activate.extension_id) AS extension_id
      , NVL(install.extension_version, activate.extension_version) AS extension_version
      , NVL(install.channel_id, activate.channel_id) AS channel_id
      , activate.extension_anchor
      , install.last_time_install AS time_most_recent_install
      , activate.last_time_activate AS time_most_recent_activate
      , CASE
          WHEN install.last_time_install IS NULL AND activate.last_time_activate IS NOT NULL THEN 'activate_only'
          WHEN install.last_time_install IS NOT NULL AND activate.last_time_activate IS NULL THEN 'install_only'
          WHEN install.last_time_install IS NULL AND activate.last_time_activate IS NULL THEN 'error_both_null'
          WHEN install.last_time_install IS NOT NULL AND activate.last_time_activate IS NOT NULL THEN 'activate_and_install'
          ELSE 'error_weird'
        END AS flag_install_activate_status
    FROM install_logs_step3 AS install
    FULL OUTER JOIN activate_logs_step4 AS activate --full outer join to make sure we cover all potential cases of activate and install
      ON install.channel_id = activate.channel_id
      AND install.extension_id = activate.extension_id
      -- AND install.extension_version = activate.extension_version --IS THIS NEEDED? PROBABLY NOT
    GROUP BY 1,2,3,4,5,6,7,8,9,10
  )
  -- join install+activate with anchor change together
  , install_activate_logs_step1 AS (
    SELECT base.*, anchor.last_time_anchor_changed AS time_most_recent_anchor_changed, anchor.extension_anchor_new
    FROM install_activate_logs_step0 AS base
    LEFT JOIN anchor_changed_logs_step4 AS anchor
      ON base.channel_id = anchor.channel_id
      AND base.extension_id = anchor.extension_id
  )
  -- determine if we need to update the extension_anchor based on timestamps
  , install_activate_logs_step2 AS (
    SELECT base.start_date, base.end_date, base.the_day, base.extension_id, base.extension_version, base.channel_id
      , CASE
          WHEN time_most_recent_activate IS NOT NULL
            AND time_most_recent_anchor_changed IS NOT NULL
            AND time_most_recent_anchor_changed > time_most_recent_activate
              THEN extension_anchor_new --if the anchor change event happened after the activate event, then update to extension_anchor_new
          ELSE
              extension_anchor
        END AS extension_anchor
      , time_most_recent_install, time_most_recent_activate, time_most_recent_anchor_changed, flag_install_activate_status
    FROM install_activate_logs_step1 AS base
  )
  --update the extension_version with data from extension_daily_version rollup
  , install_activate_logs_step3 AS (
    SELECT base.start_date, base.end_date, base.the_day
      , base.extension_id
      , NVL(ext_ver.extension_version, base.extension_version) AS extension_version --fill in with 0.0.0 or the original version from activate log if we don't have a record of it in render logs that day from the rollup
      , base.channel_id
      , base.extension_anchor
      , base.time_most_recent_install
      , base.time_most_recent_activate
      , base.time_most_recent_anchor_changed
      , base.flag_install_activate_status
    FROM install_activate_logs_step2 AS base
    LEFT JOIN analysis.dsds_281_extension_daily_version AS ext_ver ON base.the_day = ext_ver.the_day AND base.extension_id = ext_ver.extension_id
  )
  -- for extensions that were last activated 2018-06-19 and after, try to infer their extension_anchor as best we can
  , install_activate_logs_step4 AS (
    SELECT base.start_date, base.end_date, base.the_day
      , base.extension_id
      , base.extension_version
      , base.channel_id
      , CASE
          WHEN arev.is_component_enabled IS FALSE AND arev.is_overlay_enabled IS FALSE AND arev.is_panel_enabled IS TRUE THEN 'panel' --only possible as panel
          WHEN arev.is_component_enabled IS FALSE AND arev.is_overlay_enabled IS TRUE AND arev.is_panel_enabled IS FALSE THEN 'video_overlay' --only possible as overlay
          WHEN arev.is_component_enabled IS TRUE AND arev.is_overlay_enabled IS FALSE AND arev.is_panel_enabled IS FALSE THEN 'component' --only possible as component
          ELSE NVL(base.extension_anchor, 'NULL_EXTENSION_ANCHOR') --if it's not obvious, then return NULL
        END AS extension_anchor
      , base.time_most_recent_install
      , base.time_most_recent_activate
      , base.time_most_recent_anchor_changed
      , base.flag_install_activate_status
    FROM install_activate_logs_step3 AS base
    LEFT JOIN all_released_extension_versions AS arev
      ON base.extension_id = arev.extension_id AND base.extension_version = arev.extension_version
  )



  --get raw deactivate logs and remove duplicates
  , deactivate_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id::BIGINT
    FROM tahoe.extension_deactivate
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
    AND channel_id = (SELECT channel_id FROM the_channel_constants)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , deactivate_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
      , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
      , channel_id
    FROM deactivate_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , deactivate_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_deactivate
    FROM deactivate_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , deactivate_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_deactivate
    FROM (
      SELECT *, MAX(last_time_deactivate) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_deactivate --this max timestamp is extension_version agnostic
      FROM deactivate_logs_step2
    ) AS final
    WHERE last_time_deactivate = max_last_time_deactivate --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day, just take the last one)
  )







  --get raw uninstall logs and remove duplicates
  , uninstall_logs_step0 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, channel_id::BIGINT
    FROM tahoe.extension_uninstall
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
    AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
    AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
    AND channel_id = (SELECT channel_id FROM the_channel_constants)
    GROUP BY 1,2,3,4,5,6
  )
  --processing to deal with same event firing duplicate rows with one being normal extension_version and another being extension_version='N/A'
  , uninstall_logs_step1 AS (
    SELECT start_date, end_date, "time", extension_id
    , MAX(CASE WHEN extension_version='N/A' THEN '0.0.0' ELSE extension_version END) AS extension_version --this is to deal with same event firing duplicate events with one being regular extension_version and another being extension_version='N/A'
    , channel_id
    FROM uninstall_logs_step0
    GROUP BY 1,2,3,4,6
  )
  --for each user and extension and version combo, get the last timestamp that the action occurred (for the given start_date to end_date range)
  , uninstall_logs_step2 AS (
    SELECT start_date, end_date, extension_id, extension_version, channel_id, MAX(time) AS last_time_uninstall
    FROM uninstall_logs_step1
    GROUP BY 1,2,3,4,5
  )
  --for each user and extension, return 1 row that is the most recent action
  , uninstall_logs_step3 AS (
    SELECT final.start_date, final.end_date, final.extension_id, final.extension_version, final.channel_id, final.last_time_uninstall
    FROM (
      SELECT *, MAX(last_time_uninstall) OVER (PARTITION BY start_date, end_date, channel_id, extension_id) AS max_last_time_uninstall --this max timestamp is extension_version agnostic
      FROM uninstall_logs_step2
    ) AS final
    WHERE last_time_uninstall = max_last_time_uninstall --only get the activate record that matches the max timestamp (i.e. they activate/reactivate multiple times within same day)
  )




  --get raw delete logs and remove duplicates, used when extension_version IS NOT the value 'N/A' (which we assign to 0.0.0)
  , delete_logs_v1 AS (
    SELECT
      (SELECT start_date::DATE FROM the_basetable_date) AS start_date
      , (SELECT end_date::DATE FROM the_basetable_date) AS end_date
      , "time"::TIMESTAMP, extension_id::VARCHAR, extension_version::VARCHAR, user_id::BIGINT
    FROM tahoe.extension_delete
    WHERE extension_id IS NOT NULL AND extension_version IS NOT NULL
      AND (SELECT start_date FROM the_basetable_date)<="time" AND "time"<(SELECT end_date FROM the_basetable_date)
      AND (SELECT start_date FROM the_basetable_date)<="date" AND "date"<(SELECT end_date FROM the_basetable_date)
      AND state IN ('Deleted') --there is non 'Deleted' data in this table for op.gg league of legends extensions for date=2017-11-10
    GROUP BY 1,2,3,4,5,6
  )























  -- JOIN install_activate_logs_step4 with deactivate_logs_step3, uninstall_logs_step3, delete_logs_v1 & delete_logs_v2
  , all_join_logs_step0 AS (
    SELECT base.start_date, base.end_date, base.the_day
      , base.extension_id, base.extension_version, base.extension_anchor, base.channel_id
      , base.flag_install_activate_status, base.time_most_recent_install, base.time_most_recent_activate, base.time_most_recent_anchor_changed
      , deact.last_time_deactivate AS time_most_recent_deactivate, uninstall.last_time_uninstall AS time_most_recent_uninstall
      , delete_v1.time AS time_most_recent_delete
    FROM install_activate_logs_step4 AS base
    LEFT JOIN deactivate_logs_step3 AS deact ON base.channel_id = deact.channel_id AND base.extension_id = deact.extension_id
    LEFT JOIN uninstall_logs_step3 AS uninstall ON base.channel_id = uninstall.channel_id AND base.extension_id = uninstall.extension_id
    LEFT JOIN delete_logs_v1 AS delete_v1 --when extension_version!='N/A', then we do join against extension_version
      ON base.extension_version!='0.0.0' AND base.extension_id = delete_v1.extension_id AND base.extension_version=delete_v1.extension_version
    -- LEFT JOIN delete_logs_v2 AS delete_v2 --when extension_version='N/A', then we don't join against extension_version
    --   ON base.extension_version='0.0.0' AND base.extension_id = delete_v2.extension_id
  )
  --use timestamps of the different events to determine if the extension is activated, calculate this first as the bool_extension_activated is used later
  , all_join_logs_step1 AS (
    SELECT the_day, extension_id, extension_version, extension_anchor, channel_id, flag_install_activate_status
      , time_most_recent_install, time_most_recent_activate, time_most_recent_anchor_changed
      , time_most_recent_deactivate, time_most_recent_uninstall, time_most_recent_delete
      , CASE
          WHEN time_most_recent_activate IS NULL THEN false --if no activate timestamp then can't be activated
          WHEN time_most_recent_deactivate >= time_most_recent_activate AND time_most_recent_deactivate IS NOT NULL THEN false -- deactivate happened after activate, then not activated
          WHEN time_most_recent_uninstall >= time_most_recent_activate AND time_most_recent_uninstall IS NOT NULL THEN false -- uninstall happened after activate, then not activated
          WHEN time_most_recent_delete >= time_most_recent_activate AND time_most_recent_delete IS NOT NULL THEN false --delete happened after activate, then not activated
          ELSE true
      END AS bool_extension_activated
    FROM all_join_logs_step0
  )
  --use timestamps of the different events to determine if the extension is installed, uses bool_extension_activated from before
  , all_join_logs_step2 AS (
    SELECT the_day, extension_id, extension_version
      , CASE
          WHEN bool_extension_activated IS FALSE THEN NULL -- if the extension is not active, then shouldn't have extension_anchor value for data consistency
          ELSE extension_anchor
        END AS extension_anchor
      , channel_id, flag_install_activate_status
      , time_most_recent_install, time_most_recent_activate, time_most_recent_anchor_changed
      , time_most_recent_deactivate, time_most_recent_uninstall, time_most_recent_delete
      , bool_extension_activated
      , CASE
          WHEN bool_extension_activated IS TRUE THEN true --if extension is activated based on logic, then it must be installed even if the timestamps don't have it
          WHEN time_most_recent_install IS NULL THEN false --if no install timestamp then can't be installed, this also should never happen in this query (both install and activate timestamps are null)
          WHEN time_most_recent_uninstall >= time_most_recent_install AND time_most_recent_install IS NOT NULL THEN false -- uninstall happened after install, then not installed
          WHEN time_most_recent_delete >= time_most_recent_install AND time_most_recent_delete IS NOT NULL THEN false --delete happened after install, then not installed
          ELSE true
      END AS bool_extension_installed
    FROM all_join_logs_step1
  )
  -- reorder fields to be the same order as defined in the schema, and remove extraneous records
  , all_join_logs_step3 AS (
    SELECT
      base.the_day
      , base.extension_id
      , base.extension_version
      , base.extension_anchor
      , base.channel_id
      , base.flag_install_activate_status
      , base.time_most_recent_install
      , base.time_most_recent_activate
      , base.time_most_recent_anchor_changed
      , base.time_most_recent_deactivate
      , base.time_most_recent_uninstall
      , base.time_most_recent_delete
      , base.bool_extension_installed
      , base.bool_extension_activated
    FROM all_join_logs_step2 AS base
    WHERE (bool_extension_installed IS TRUE OR bool_extension_activated IS TRUE) AND extension_version IS NOT NULL
    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14
  )





  , final_data AS (
    SELECT * FROM all_join_logs_step3
  )

  SELECT * FROM final_data ORDER BY 1,2,3,4,5,6,7;



  -- --CHECK TO SEE IF THERE ARE DUPLICATES
  -- , weird_channel_id AS (
  --   SELECT *
  --   FROM (
  --     SELECT channel_id, extension_id, the_day, COUNT(1) AS rows
  --     FROM final_data
  --     GROUP BY 1,2,3
  --   )
  --   WHERE rows>1
  -- )
  -- SELECT * FROM final_data
  -- WHERE channel_id IN (SELECT channel_id FROM weird_channel_id) AND extension_id IN (SELECT extension_id FROM weird_channel_id)
  -- ORDER BY channel_id, extension_id, time_most_recent_activate
  -- ;
  --


-- , weird_channel_id AS (
--   SELECT *
--   FROM (
--     SELECT channel_id, extension_id, start_date, end_date, COUNT(1) AS rows
--     FROM final_data
--     GROUP BY 1,2,3,4
--   )
--   WHERE rows>1
-- )
--
-- SELECT * FROM final_data WHERE channel_id IN (SELECT channel_id FROM weird_channel_id)
-- ORDER BY channel_id, time_most_recent_activate


--
-- -- CHECK SUMMARY DATA
-- SELECT start_date, end_date, flag_install_activate_status, extension_id, extension_anchor
-- , COUNT(1) AS rows, COUNT(DISTINCT channel_id) AS channels--, COUNT(DISTINCT extension_id+extension_version) AS extension_and_versions
-- , MIN(time_most_recent_install) AS min_install, MAX(time_most_recent_install) AS max_install
-- , MIN(time_most_recent_activate) AS min_activate, MAX(time_most_recent_activate) AS max_activate
-- , MIN(time_most_recent_deactivate) AS min_deactivate, MAX(time_most_recent_deactivate) AS max_deactivate
-- , MIN(time_most_recent_uninstall) AS min_uninstall, MAX(time_most_recent_uninstall) AS max_uninstall
-- , MIN(time_most_recent_delete) AS min_delete, MAX(time_most_recent_delete) AS max_delete
-- --, MIN(end_date) AS min_end, MAX(end_date) AS max_end
-- FROM final_data
-- GROUP BY 1,2,3,4,5
-- ORDER BY 1,2,3,4,5
