#!/usr/bin/env python
"""
This is a script checking if a branch can be merged into mainline and published
"""

import json
import logging
import gzip
import sys
from datetime import datetime, timedelta, timezone

import boto3
from botocore.exceptions import ClientError

import publish
import build


def get_schema_configs_json():
    """get current bp schema config json as dict from s3"""
    s3_resource = boto3.resource('s3')
    bucket = 'science-blueprint-configs'
    key = 'prod-schema-configs.json.gz'
    obj = s3_resource.Object(bucket, key)
    content = None
    try:
        with gzip.GzipFile(fileobj=obj.get()["Body"]) as gzipfile:
            content = gzipfile.read()
    except ClientError as ex:
        if ex.response['Error']['Code'] == 'NoSuchKey':
            logging.info('No object found - returning empty')
            return None
        raise
    return json.loads(content)


def transform_bp_state(content_json):
    """
    Get current bp state in dict format where each event name is a key and outbound names of
    each of the columns is a key in the inner dict
    """
    events_data = {}
    for event in content_json:
        column_info = {}
        for column in event['Columns']:
            column_info[column['OutboundName']] = column
        events_data[event['EventName']] = {
            'Columns': column_info,
            'Metadata': {
                'CreatedTS': event['CreatedTS'],
                'TS': event['TS'],
                'UserName': event['UserName']
            }
        }
    return events_data


def get_desired_state(events, groups, fields):
    """
    Get the desired state in dict format where each event name is a key and outbound names of
    each of the columns is a key in the inner dict
    """
    s3_resource = boto3.resource('s3')
    bp_events = publish.get_blueprint_ssr_owned_events('prod', s3_resource)
    desired_events = publish.convert_events_to_desired_state(events, groups, fields, bp_events)
    events_data = {}
    for event in desired_events['eventEndStates']:
        column_info = {}
        for column in event['columns']:
            column_info[column['outboundName']] = column
        events_data[event['eventName']] = {'Columns': column_info, 'Metadata': {}}
    return events_data


def is_different(event, curr_columns, desired_columns):
    """returns true if curr_columns are different from desired_columns"""
    curr_column_names = set(curr_columns.keys())
    desired_column_names = set(desired_columns.keys())
    if curr_column_names != desired_column_names:
        # column removes or drops count as diff/op
        logging.info('''%s proposed columns are different dropping columns %s adding columns %s''',
                        event, curr_column_names-desired_column_names,
                        desired_column_names-curr_column_names)
        return True
    common_columns = curr_column_names.intersection(desired_column_names)
    failed = False
    for column in common_columns:
        if curr_columns[column]['ColumnCreationOptions'] != \
            desired_columns[column]['columnCreationOptions']:

            logging.info('event %s column %s ColumnCreationOptions are different \
                            current size: %s proposed size: %s',
                            event, column,
                            curr_columns[column]['ColumnCreationOptions'],
                            desired_columns[column]['columnCreationOptions'])
            failed = True
    return failed


def get_events_to_change(curr_state, end_state):
    """returns the number of events that must be changed to converge to the desired state"""
    changes_proposed = {}
    for event, info in curr_state.items():
        column_info = info['Columns']
        if event in end_state:
            created_ts = datetime.strptime(info['Metadata']['CreatedTS'], '%Y-%m-%dT%H:%M:%S.%fZ')
            time_since_creation = datetime.now() - created_ts
            changes_proposed[event] = {
                'TimeSinceCreation': time_since_creation,
            }
            if is_different(event, column_info, end_state[event]['Columns']):
                changes_proposed[event]['ChangesProposed'] = True
    return changes_proposed


def get_old_event_changes_proposed(curr_state, end_state):
    """returns number of events >30 days old being changed"""
    changes_proposed = get_events_to_change(curr_state, end_state)
    total_changes_proposed = 0
    for _, change_info in changes_proposed.items():
        if change_info['TimeSinceCreation'].days > 30 and change_info.get('ChangesProposed', False):
            total_changes_proposed += 1
    return total_changes_proposed


def are_changes_valid(curr_state, end_state):
    """
        returns false if blueprint can't get from current_state
        to desired_state using valid changes
    """
    fail_reasons = []
    for event, info in curr_state.items():
        columns = info['Columns']
        if event in end_state:
            curr_column_names = set(columns.keys())
            desired_column_names = set(end_state[event]['Columns'].keys())
            common_columns = curr_column_names.intersection(desired_column_names)
            for col in common_columns:
                curr_type = curr_state[event]['Columns'][col]['Transformer']
                desired_type = end_state[event]['Columns'][col]['transformer']
                if curr_type != desired_type:
                    fail_reasons.append(f'event {event} column {col} type change from {curr_type} '+
                        f'to {desired_type} not allowed')

                curr_sensitivity = curr_state[event]['Columns'][col]['SensitivityType']
                # fields where sensitivity is set to none in blueprint show up with sensitivity
                # empty in the schema_config json
                if curr_sensitivity == '':
                    curr_sensitivity = None
                desired_sensitivity = end_state[event]['Columns'][col]['sensitivityType']
                if curr_sensitivity != desired_sensitivity:
                    fail_reasons.append(f'event {event} column {col} sensitivity change from '+
                        f'{curr_sensitivity} to {desired_sensitivity} not allowed')

                curr_inbound_name = curr_state[event]['Columns'][col]['InboundName']
                desired_inbound_name = end_state[event]['Columns'][col]['inboundName']
                if curr_inbound_name != desired_inbound_name:
                    fail_reasons.append(f'event {event} column {col} inbound name change '+
                        f'from {curr_inbound_name} to {desired_inbound_name} not allowed')

                # if an event is being imported for the first time, the user is not allowed to
                # drop columns. The user can change column sizes, since that is not
                # a destructive operation.
                columns_to_be_dropped = curr_column_names - desired_column_names
                if info['Metadata']['UserName'] != 'Spade Schema Registry' and \
                    len(columns_to_be_dropped) != 0:
                    fail_reasons.append('You cannot drop columns the first time you import an '+
                        f'event. You are trying to drop columns {columns_to_be_dropped}.')

    return len(fail_reasons) == 0, fail_reasons


def check_time_range_valid(change_time, invalid_range_start, invalid_range_end):
    """returns true if current time does not fall in the given block period"""
    if invalid_range_start <= change_time <= invalid_range_end:
        return False
    return True


def get_num_recently_changed_events(curr_state):
    """get the number of events modified in the last 15 minutes"""
    recent_change_window = datetime.now(timezone.utc) - timedelta(minutes=15)
    num_recently_changed_events = 0
    for _, info in curr_state.items():
        last_modified_at = datetime.strptime(info['Metadata']['TS'],
                            '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=timezone.utc)
        if last_modified_at > recent_change_window:
            num_recently_changed_events += 1
    return num_recently_changed_events


def main():
    """main function that checks if changes proposed are valid"""
    events = build.objects_by_name(build.get_events().values())
    groups = build.objects_by_name(build.get_groups().values())
    fields = build.objects_by_name(build.get_fields().values())
    desired_state = get_desired_state(events, groups, fields)
    current_state = transform_bp_state(get_schema_configs_json())
    proposed_chanes = get_old_event_changes_proposed(current_state, desired_state)
    recently_changed_events = get_num_recently_changed_events(current_state)
    valid, reasons = are_changes_valid(current_state, desired_state)
    failed = False
    logging.info("Starting event change validation")
    if not valid:
        for reason in reasons:
            logging.error('changes are invalid reason: %s', reason)
        failed = True

    msg = 'number of changes proposed + recently changed events %d > 2'
    if proposed_chanes + recently_changed_events > 2:
        logging.error(msg, proposed_chanes + recently_changed_events)
        failed = True

    now = datetime.now(timezone.utc)
    block_period_start = now.replace(hour = 4, minute = 0, second = 0, microsecond = 0)
    block_period_end = now.replace(hour = 8, minute = 0, second = 0, microsecond = 0)

    if not check_time_range_valid(now, block_period_start, block_period_end):
        logging.error('Changes are not allowed between 4AM and 8AM UTC')
        failed = True

    logging.info("Done with event change validation")

    if failed:
        sys.exit(1)


if __name__ == "__main__":
    main()
