#This exists in our ec2 box at /home/cronjob/googlesheets
import boto3
import json
import gspread
from collections import defaultdict
from oauth2client.service_account import ServiceAccountCredentials
import re
import psycopg2
from io import StringIO
from multiprocessing import Process
import time
import traceback
import requests
import s

def kickoff_process(link,sheet):

    """server='tahoe-tap.community-data.twitch.a2z.com'
    login='redshift_bot'
    db='product'
    port = 5439
    connection = psycopg2.connect(dbname=db,user=login,port=port,password='PASSWORD_HERE',host=server)
    """

    connection = s.getConnection()
    #Set up S3 Connection
    s3 = boto3.client('s3')
    write_to_s3(link,sheet,s.getGC(),s3,connection)


def write_to_s3(link,title,gc,s3,connection):
    """ Retrives the content from a googlesheet and creates a csv on Redshift"""

    try:
        #Initialize variables
        workbook = gc.open_by_url(link)
        filename = re.sub('[^A-Za-z0-9._]','','{}.csv'.format(title))
        sheet = workbook.worksheet(title)
        file_headers = []
        content = StringIO()
        data_fields = []
        is_string = []
        rows = sheet.get_all_values()
        # We go through each row and write the row into the String buffer.
        for index, row in enumerate(rows):
            # When we reach the first row, we initialize the column names and the data types. If we encounter a string, we store a running counter of the biggest string in order to have better memmory allocation instead of VARCHAR(MAX)
            if index == 0:
                for column in row:
                    name,data_type = column.split('[')
                    data_type = data_type[:-1]
                    file_headers.append(name)
                    if data_type.lower() == 'string':
                        data_fields.append(['VARCHAR',0])
                        is_string.append(True)
                    else:
                        data_fields.append(data_type)
                        is_string.append(False)
            else:
                for counter,cell in enumerate(row):
                    string = cell.replace('\'','').replace('"','').replace(',','')
                    if is_string[counter]:
                        if len(string) > data_fields[counter][1]:
                            data_fields[counter][1] = len(string)
                    row[counter] = string
                line = ','.join(row)
                if index == len(rows)-1:
                    content.write(line)
                else:
                    content.write('{}\n'.format(line))

        #We then write the string buffer into a file onto s3 without the need to save a file locally.
        s3.put_object(Bucket='google-sheets-upload', Key = '{}.csv'.format(sheet.title), Body = content.getvalue())
        print('WROTE {} TO S3'.format('{}.csv'.format(sheet.title)))

        #Close the string buffer and kickoff the copy into redshift.
        content.close()
        upload_to_redshift(connection,'{}.csv'.format(sheet.title),file_headers,data_fields)
    except:
        message = '`{}` failed due to errors: \n```{}```'.format(title,traceback.format_exc())
        print(message)
        #send_slack_message(message)

def upload_to_redshift(connection,file,file_info,data_info):
    """ Retrieves the file from S3 and copies the csv into a table on Redshift. """

    try:
        #Build out the Column String
        columns = ''
        for i in range(len(file_info)):
            if type(data_info[i]) == list:
                columns += '{} {}({}), '.format(file_info[i],data_info[i][0],data_info[i][1])
            else:
                columns += '{} {}, '.format(file_info[i],data_info[i])
        columns = '({})'.format(columns[:-2])

        # Initializes psql string commands
        sql_script = 'DROP TABLE  IF EXISTS dx_csv.{};\n CREATE TABLE dx_csv.{} {}; \nCOPY dx_csv.{} from \'s3://google-sheets-upload/{}\'\nDELIMITER \',\'\niam_role \'arn:aws:iam::838590096234:role/dx-data-s3-access\';'
        sql_command = sql_script.format(file.split('.')[0] ,file.split('.')[0], columns,file.split('.')[0],file)
        cursor = connection.cursor()

        # Runs the PSQL copy command.
        print('Running')
        cursor.execute(sql_command)
        connection.commit()
        print('UPLOADED {}'.format(file))
    except:
        send_slack_message('`{}` failed due to errors: \n```{}```'.format(file,traceback.format_exc()))

def send_slack_message(message):
    """ Builds the json object to POST into a slack webhook. """
    slack_url = 'https://slack_webhook_url'
    payload = {"text":"","link_names":1,"mrkdwn":True}
    payload['text'] = message
    requests.post(slack_url,data=json.dumps(payload),headers={'Content_Type': 'application/json'})

def update_permissions():
    connection = s.getConnection()
    cursor = connection.cursor()
    cursor.execute('GRANT USAGE ON SCHEMA dx_csv TO GROUP readonly;\nGRANT SELECT ON ALL TABLES IN SCHEMA dx_csv TO GROUP readonly;\nGRANT USAGE ON SCHEMA dx_csv TO GROUP analyst;\nGRANT SELECT ON ALL TABLES IN SCHEMA dx_csv TO GROUP analyst;\nGRANT USAGE ON SCHEMA dx_csv TO GROUP aggregate_only;\nGRANT SELECT ON ALL TABLES IN SCHEMA dx_csv TO GROUP aggregate_only;')
    connection.commit()



if __name__ == '__main__':

    start = time.time()
    # Sets up the gpread api.
    """scope = ['https://spreadsheets.google.com/feeds',
             'https://www.googleapis.com/auth/drive']

    # Get the special crednetials json file from yanghung
    credentials = ServiceAccountCredentials.from_json_keyfile_name('/path/to/google_oauth_credentials.json', scope)
    gc = gspread.authorize(credentials)
    """
    gc = s.getGC()

    #Set up S3 Connection
    s3 = boto3.client('s3')

    #Master Spreadsheet with all of the links to scrape through
    master_workbook = gc.open_by_url('https://docs.google.com/spreadsheets/d/1qBYVPgYaRNZGuzQ9W2uSW1CPiQBLVISvFQPhf1Ft61Y/edit#gid=0')

    # Map out the sheets that are to be opened for each link. If no sheets are specified, all of the sheets will be imported.
    link_sheets = defaultdict(list)

    #Just to skip the first row
    for index,row in enumerate(master_workbook.worksheet('INPUT HERE').get_all_values()):
        if index == 0:
            continue
        if len(row) > 1 and len(''.join(row[1:]).strip()) != 0:
            link_sheets[row[0]].extend([i for i in row[1:] if len(i.strip()) != 0])
    processes = []
    for link,sheets in link_sheets.items():
        for sheet in sheets:
            processes.append(Process(target=kickoff_process,args=(link,sheet)))
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    print('Updating Permissions')
    update_permissions()
    print(time.time()-start)
