-- FOR DETAILS GO TO
-- https://jira.twitch.com/browse/DSDS-343
DROP TABLE IF EXISTS analysis.dsds_343_test_mw_self_join;
CREATE TABLE analysis.dsds_343_test_mw_self_join AS

WITH
--tahoe_analysis.dsds_343_rainbow6_similar_streamers
the_constants AS (
  SELECT 'https://www.twitch.tv/lil_lexi' AS channel, 49415591 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/zironicdk', 29661191 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/tangyd', 38030324 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/annemunition', 51533859 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/shortyyguy', 20420989 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/MacieJay', 122320848 AS channel_id
  UNION ALL SELECT 'https://www.twitch.tv/tatted', 89422807 AS channel_id
)


, daily_content_by_device AS (
  SELECT device_id, channel, sum(mw) as mw
  FROM rollups.daily_content_watched_by_device
  WHERE
    day>= DATE_TRUNC('week', (SYSDATE - INTERVAL '7 hours')) - INTERVAL '30 days'
    AND day < DATE_TRUNC('week', (SYSDATE - INTERVAL '7 hours'))
    AND device_id not in ('0000000000000000', '00000000000000000000000000000000', '(null)')
    AND len(device_id) >= 16 -- If you want additional security, we also flag any device_id smaller than 16 characters as suspicious, but it may be overkill in historical data:
    AND f_sql_get_video_product_type(broadcaster_software, vod_type, content_mode, live)='live' --count live minutes only
      --https://git-aws.internal.justin.tv/stats/udf/blob/master/f_sql_get_video_product_type.sql
  GROUP BY 1,2
)
, mw_table as (
  SELECT a.channel AS channel, b.channel AS competitor, SUM(a.mw) AS mw_a, SUM(b.mw) AS mw_b, SUM(b.mw) as mw
  FROM daily_content_by_device a
  LEFT JOIN daily_content_by_device b
    ON b.device_id = a.device_id
  GROUP BY 1, 2
)

SELECT * FROM mw_table --WHERE mw_a<>mw_b

--
-- -- with cdm as (
-- --   select
-- --     device_id,
-- --     channel,
-- --     sum(mw) as mw
-- --   from rollups.daily_content_watched_by_device
-- --   where day >= sysdate - 30
-- --     and device_id not in ('(null)')
-- --   group by 1, 2
-- -- )
--
-- , mw_table as (
--   select
--     a.channel as channel,
--     b.channel as competitor,
--     sum(b.mw) as mw
--   from cdm a
--   left join cdm b
--     on b.device_id = a.device_id
--   group by 1, 2
-- )
--
-- , mw_totals as (
--   select
--     channel,
--     mw
--   from mw_table
--   where channel = competitor
-- )
--
-- , agg as (
--   select
--     a.channel,
--     a.competitor,
--     a.mw as mw_a_to_b,
--     b.mw as mw_b_to_a,
--     c.mw as a_total_mw,
--     d.mw as b_total_mw
--   from mw_table a
--   left join mw_table b
--     on b.competitor = a.channel
--     and b.channel = a.competitor
--   left join mw_totals c
--     on c.channel = a.channel
--   left join mw_totals d
--     on d.channel = a.competitor
-- )
--
-- , res as (
--   select
--     channel,
--     competitor,
--     (mw_a_to_b + mw_b_to_a)::float / (a_total_mw + b_total_mw) as jaccard_sim
--   from agg
--   where channel != competitor
-- )
--
-- , c_agg as (
--   select
--     channel,
--     competitor,
--     jaccard_sim,
--     row_number() over (partition by channel order by jaccard_sim desc) AS rank
--   from res
-- )
--
-- , gdm as (
--   select
--     device_id,
--     channel,
--     game,
--     sum(mw) as mw
--   from rollups.daily_content_watched_by_device
--   where day >= sysdate - 30
--   group by 1, 2,3
-- )
--
-- , gmw_list as (
--   select
--     cdm.channel,
--     gdm.game,
--     sum(gdm.mw) as mw
--   from cdm
--   join gdm
--     on gdm.device_id = cdm.device_id and gdm.channel!=cdm.channel
--   group by 1, 2
-- )
--
-- , game_mw as (
--   select
--     channel,
--     sum(mw) as game_mw
--   from gmw_list
--   group by 1
-- )
--
-- , g_agg as (
--   select
--     mwl.channel,
--     game,
--     mw,
--     game_mw,
--     mw::float / game_mw as pct,
--     row_number() over (partition by mwl.channel order by mw desc) AS rank
--   from gmw_list mwl
--   left join game_mw gmw
--     on gmw.channel = mwl.channel
-- )
--
--
--
-- select
--   competitor,
--   jaccard_sim,
--   game,
--   g_agg.pct as prop_game_mw
-- from c_agg
-- left join g_agg
--   on g_agg.channel = c_agg.channel
--   and g_agg.rank = c_agg.rank
-- where c_agg.rank <= 20
--   and g_agg.rank <= 20
--   and c_agg.channel in (
-- 'annemunition'
-- , 'lirik'
-- , 'grandpoobear'
-- , 'ninja'
-- , 'mrchowderclam'
-- , 'shroud'
-- , 'witwix'
-- , 'pokercentral'
-- , 'cohhcarnage'
--  )
--   and g_agg.channel = ''
