# -*- coding: utf-8 -*-
"""
Created on Wed Dec  9 17:28:39 2015

Collection of routines for Hydrangea post-processing and analysis
[*: self-contained]

ReadRegion: class for reading in a small sub-region of a Hydrangea output.
clone_dir: clone a directory name into a side-branch.
*snap_times: return the times of all Hydrangea outputs (default: snapshots).
get_snepshot_indices: read and return the snepshot indices for one simulation
"""

import h5py as h5
import sys
from bisect import bisect_right
from pdb import set_trace
from operator import mul
from functools import reduce
import numpy as np
import time
from astropy.io import ascii
import os
import sim_tools as st
from astropy.cosmology import Planck13
import astropy.units as u
import yb_utils as yb

currDir = os.path.dirname(os.path.realpath(__file__)) + "/"

# ----------- READ_REGION -------------------

class ReadRegion:
    """
    Set up a region for efficient reading of data from snapshot files.
    
    This class can be called with several parameters to allow easy setup
    of commonly encountered selection regions (sphere, cube, box). Internally,
    the particle map generated by MapMaker is then read and processed into a 
    (typically small) number of segments to read in.
    """

    def __init__(self, filename, parttype, coordinates, shape = None, verbose=False, exact=False, astro = False, pmfile = None, periodic = False, load_full = False):

        stime = time.time()

        if pmfile is None:
            self.pmfile = filename
        else:
            self.pmfile = pmfile
        self.filename = filename
        self.parttype = parttype
        self.PTName = "PartType" + str(parttype)
        self.coordinates = coordinates
        if shape is None:
            shape = "sphere"
        self.exact = exact

        if periodic:
            self.periodic = True
        else:
            self.periodic = False

        # Deal with capital cases, and set up centres for use later
        self.centre = None            
        if shape == 'Sphere' or shape == 'sphere':
            shape = 'sphere'
            self.centre = coordinates[0:3]
        if shape == 'Box' or shape == 'box':
            shape = 'box'
            self.centre = coordinates[0:3]
        if shape == 'Cube' or shape == 'cube':
            shape = 'cube'
            self.centre = coordinates[0:3]

        self.shape = shape 
     
        WorkDir = yb.file_to_dir(self.pmfile)
        MapFile = WorkDir + "/ParticleMap.hdf5"
 
        if not os.path.exists(MapFile):
            load_full = True

        if load_full:
            self.loadFull = True
            self.NumSegments = np.inf
            return
        else:

            f = h5.File(MapFile, 'r')
 
            # Select rectangular region of simulation to be loaded
            box = self._make_selection_box()

            if verbose:
                print("Selection box is", box)

            # Identify cells lying in region of interest
            CellOffsets, CellLength = self._identify_relevant_cells(box, f, verbose=verbose)        
        
            self.NumCells = reduce(mul, CellLength, 1)
            print("Checking %d cells..." % self.NumCells)        
        
            if self.NumCells == 0:
                return

            if self.NumCells > 50000:
                self.loadFull = True
                self.NumSegments = np.inf
                return

            else:
                self.loadFull = False

        # Find the File Index, Offset, and Length for all Segments
        self.Files, self.Offsets, self.Lengths = self._find_segments(CellOffsets, CellLength, f, verbose=verbose)

        print("Region setup took {:.3f} sec." .format(time.time()-stime))

        # Experimental section: make list of indices lying EXACTLY in selection region
        if exact:
            
            # Need to explicitly set 'exact = False' here, because we have not 
            # yet set up exact loading (we are doing it right now!)
            coords = self.read_data("Coordinates", exact = False)
            if self.centre is not None:
                relpos = coords - np.array(self.centre)[None, :]  
            else:
                print("Your shape '" + self.shape + "' is not yet implemented. Sorry.")
                sys.exit()

            if self.shape == 'sphere':
                relrad = np.linalg.norm(relpos,axis=1)
                self.ind_sel = np.nonzero(relrad <= self.coordinates[3])[0]

            elif self.shape == 'cube':
                self.ind_sel = np.nonzero((relpos[:,0] <= self.coordinates[3]) &
                                          (relpos[:,1] <= self.coordinates[3]) &
                                          (relpos[:,2] <= self.coordinates[3]))[0]
                
            elif self.shape == 'box':
                self.ind_sel = np.nonzero((relpos[:,0] <= (self.coordinates[3]-self.coordinates[0])) &
                                          (relpos[:,1] <= (self.coordinates[4]-self.coordinates[1])) &
                                          (relpos[:,2] <= (self.coordinates[5]-self.coordinates[2])))[0]
        
            else:
                print("Your shape '" + self.shape + "' is not understood.")
                sys.exit()
        
            self.NumParticlesExact = len(ind_sel)        
        
        # End of special section for 'exact' keyword

        print("Selection region contains %d cells, %d segments, and %d particles, spread over %d files" % (self.NumCells, self.NumSegments, self.NumParticles, len(np.unique(self.Files))))
        if verbose:
            print("  (selected files:", np.unique(self.Files), ")")
  

    # Small convenience function to update base filename during reading  
    def _swap_filename(self, baseName, number):
        nameParts = baseName.split('.')
        nameParts[-2] = str(number)
        resName = '.'.join(nameParts)
        return resName
              
    def read_data(self, dataSetName, astro = False, verbose = False, return_conv = False, exact=None, filename = None, PTName = None, singleFile = False):

        """
        Reads a specified dataset for a previously set up region.

        Args:
            dataSetName (string): The dataset to read from, including groups
                where appropriate. The leading 'PartType[x]' must *not* be 
                included!
            astro (bool): If True, convert values to proper astronomical units
                (default: False)
            verbose (bool): Enable additional log messages (default: False)
            return_conv (bool): If True, returns a list of [data, conv_astro,
                aexp], where conv_astro = conversion internal --> astro.
                (default: False)
            exact (bool): Only return data for particles lying in the exact
                specified selection region. If None (default), the value
                used to set up the ReadRegion is used.
            filename (string): Specifies an alternative path to read data from.
                This is useful for reading data from ancillary catalogues.
                By default (None), the file passed to ReadRegion is used.
            PTName (string): Specifies an alternative particle-type group name.
                By default (None), 'PartType[x]' is used.
            singleFile (bool): Set to True to only read from a single file.
                This is useful for ancillary catalogues, default: False.
        """                    

        if filename is None:
            filename = self.filename

        if PTName is None:
            PTName = self.PTName

        stime = time.time()

        if self.NumSegments == 0:

            if return_conv:
                return [None, None, None]
            else:
                return

        if self.loadFull:
            if singleFile:
                data_full = yb.read_hdf5(filename, PTName + '/' + dataSetName)

                if astro:
                    MainGroup = f[PTName]
                    DSet = MainGroup[dataSetName]
                
                    hscale_exponent = DSet.attrs["h-scale-exponent"]
                    ascale_exponent = DSet.attrs["aexp-scale-exponent"]
    
                    header = f["/Header"]
                    aexp = header.attrs["ExpansionFactor"]
                    h_hubble = header.attrs["HubbleParam"]
                    conv_astro = (aexp**ascale_exponent * 
                                  h_hubble**hscale_exponent)

                    data_full *= conv_astro    

            else:
                data_full = st.eagleread(
                    filename, PTName + '/' + dataSetName, astro = astro)

            if astro and not return_conv:
                data_full = data_full[0]
            return data_full

         
        if exact is None:
            exact = self.exact
    
        Counter = 0
        for iiseg in range(self.NumSegments):
            if not singleFile:
                CurrFileName = self._swap_filename(filename, self.Files[iiseg])
            else:
                CurrFileName = filename

            if verbose:
                print("CurrFileName = '" + CurrFileName +"'")

            f = h5.File(CurrFileName, 'r')  

            if PTName is not "":
                if PTName not in f:
                    continue

                MainGroup = f[PTName]
                DSet = MainGroup[dataSetName]

            else:
                DSet = f[dataSetName]


            CurrFirst = self.Offsets[iiseg]
            CurrLength = self.Lengths[iiseg]
            CurrBeyLast = CurrFirst+CurrLength

            # Additional bit (added 4-Apr-19):
            # Modify these values to read from single file instead
            
            if singleFile:
                CurrFirst += self.FileOffsets[self.Files[iiseg]]
                CurrBeyLast += self.FileOffsets[self.Files[iiseg]]

            if iiseg == 0:
                full_shape = list(DSet.shape)
                full_shape[0] = self.NumParticles
                data_full = np.empty(full_shape, DSet.dtype)

            if len(DSet.shape) == 1:
                data_full[Counter:Counter+CurrLength] = DSet[CurrFirst:CurrBeyLast]
            else:
                
                #print("iiseg = {:d}" .format(iiseg))
                #if (iiseg == 732):
                #    set_trace()
                data_full[Counter:Counter+CurrLength, :] = DSet[CurrFirst:CurrBeyLast, :]

            Counter += CurrLength

        #Limit selection if 'exact' is set:
        if exact:
            if len(data_full.shape) == 1:
                data_full = data_full[self.ind_sel]
            else:
                data_full = data_full[self.ind_sel, :]

        
        if astro:

            # Determine code --> physical conversion factors
            hscale_exponent = DSet.attrs["h-scale-exponent"]
            ascale_exponent = DSet.attrs["aexp-scale-exponent"]
    
            header = f["/Header"]
            aexp = header.attrs["ExpansionFactor"]
            h_hubble = header.attrs["HubbleParam"]
            conv_astro = aexp**ascale_exponent * h_hubble**hscale_exponent

            data_full *= conv_astro    

        else:

            aexp = -100
            conv_astro = -100

        print("Reading '" + dataSetName + "' took {:.3f} sec." .format(time.time()-stime))

        if return_conv:
            return data_full, conv_astro, aexp
        else:
            return data_full                    

    def total_in_region(self, dataSetName, weightquant = False, astro = False):
        """
        Convenience function to compute the total or average of 'quantity' 
        """
        
        if self.exact is False:

            print("")
            print("******************************************************************")
            print("***************************   WARNING ****************************")
            print("******************************************************************")
            print("")
            print("You have not set the 'exact' switch when establising this region.")
            print("Be aware that the reported total may include a contribution from")
            print("particles outside the target region. Proceed with caution...")            
            print("")
            print("******************************************************************")
            print("")
            
        data = self.read_data(dataSetName, astro = astro)
        
        if weightquant == False:
            return np.sum(data, axis=0)
        elif weightquant == None:
            return np.mean(data, axis=0)
        else:
            weights = self.read_data(weightquant, astro = astro)
            return np.average(data, weights = weights, axis=0)


    def _make_selection_box(self):
        """
        Find the box enclosing the selection region.
        
        For the moment, *all* particles in this region
        will be loaded. In future, we may do something more fancy
        that takes the actual selection shape into account...
        """

        coords = self.coordinates
        shape = self.shape

        if shape == "sphere" or shape == "Sphere":
            if len(coords) < 4:
                print("A sphere needs four coordinates: its centre (3), and radius (1)")
                sys.exit(1)                
                
            box = [[coords[0]-coords[3], coords[1]-coords[3],coords[2]-coords[3]],
                   [coords[0]+coords[3], coords[1]+coords[3],coords[2]+coords[3]]]
                   
        elif (shape == "cube" or shape == "Cube"):
            if len(coords) < 4:
                print("A cube needs four coordinates: its lower corner (3) and side-length (1)")
                sys.exit(1)                

            box = [coords[0:3], 
                   [coords[0]+coords[3], coords[1]+coords[3], coords[2]+coords[3]]]
            
        elif (shape == "box" or shape == "Box"):
            box = [[coords[0], coords[1], coords[2]],
                   [coords[3], coords[4], coords[5]]]

        else:
            print("Your shape '" + shape + "' is not yet implemented.")
            sys.exit(1)

        return box


    def _identify_relevant_cells(self, box, f, verbose=False):
        """
        Identify all cells intersecting the selection box.
        """
        
        Header = f["Header"]
        NumPartTotal = Header.attrs["NumPart_Total"][self.parttype]
        if NumPartTotal == 0:
            return [0,0,0], [0,0,0]
        
        MainGroup = f[self.PTName]
        CellMins = MainGroup.attrs["CellRegionCorner"]
        CellSize = MainGroup.attrs["CellSize"][0]

        if verbose:
            print("CellMins =", CellMins)
            print("CellSize =", CellSize)
        
        CellOffsets = [int((box[0][0]-CellMins[0])/CellSize),
                       int((box[0][1]-CellMins[1])/CellSize),
                       int((box[0][2]-CellMins[2])/CellSize)]            
        
        CellLengths = [int((box[1][0]-CellMins[0])/CellSize)-CellOffsets[0]+1,
                        int((box[1][1]-CellMins[1])/CellSize)-CellOffsets[1]+1,
                        int((box[1][2]-CellMins[2])/CellSize)-CellOffsets[2]+1]

        if verbose:
            print("CellOffsets =", CellOffsets)
            print("CellLengths =", CellLengths)
        
        return CellOffsets, CellLengths                


    def _find_segments(self, CellOffsets, CellLengths, f, verbose=False):
        """
        Determine the 'segments' that have to be loaded.
        
        A segment is a section of a cell lying entirely in one file.
        """
        
        MainGroup = f[self.PTName]
        CellCount  = MainGroup["CellCount"]
        CellOffset = MainGroup["CellOffset"]            
        self.FileOffsets = MainGroup["FileOffset"][:]
        FileOffset = self.FileOffsets
        NumCellsPerDim = MainGroup.attrs["NumCellsPerDim"]

        if verbose:
            print("NumCellsPerDim = ", NumCellsPerDim)

        Files, Offsets, Lengths = [], [], []
        
        CountCheck = 0
        FullCheck = 0
        for cz in range(CellOffsets[2], CellOffsets[2]+CellLengths[2]):
            for cy in range(CellOffsets[1], CellOffsets[1]+CellLengths[1]):
                for cx in range(CellOffsets[0], CellOffsets[0]+CellLengths[0]):

                    cxx, cyy, czz = cx, cy, cz
                    
                    if self.periodic:
                        if cxx < 0:
                            cxx += NumCellsPerDim[0]
                        elif cxx >= NumCellsPerDim[0]:
                            cxx -= NumCellsPerDim[0]
                        if cyy < 0:
                            cyy += NumCellsPerDim[1]
                        elif cyy >= NumCellsPerDim[1]:
                            cyy -= NumCellsPerDim[1]
                        if czz < 0:
                            czz += NumCellsPerDim[2]
                        elif czz >= NumCellsPerDim[2]:
                            czz -= NumCellsPerDim[2]
                            
                            
                    index = cxx + cyy*NumCellsPerDim[0] + czz*NumCellsPerDim[0]*NumCellsPerDim[1]
                    CountCheck += 1

                    if (CountCheck % 10000 == 0):
                        print("Checking cell %d (occupied so far: %d)" % (CountCheck, FullCheck))

                    if index < 0 or index >= CellCount.shape[0]:
                        continue

                    if (CellCount[index] == 0):
                        continue

                    FullCheck += 1
                    firstElem = CellOffset[index]
                    lastElem = CellOffset[index]+CellCount[index]-1
                  
                    File = bisect_right(FileOffset, firstElem)-1 
                    CurrOffsetInFile = CellOffset[index] - FileOffset[File]

                    if CurrOffsetInFile >= FileOffset[File+1]:
                        set_trace()
                    
                    # Set default value:
                    CurrLengthInFile = CellCount[index]

                    # Deal with special case of multi-file cell:
                    # Length extends to end of file
                    if (FileOffset[File+1] <= lastElem):  # Cell extends across file boundaries
                        CurrLengthInFile = FileOffset[File+1] - firstElem                             
                    
                    Files.append(File)
                    Offsets.append(CurrOffsetInFile)
                    Lengths.append(CurrLengthInFile)
                    
                    while(FileOffset[File+1] <= lastElem):
                        File += 1
                        Files.append(File)
                        Offsets.append(0)
                        
                        CurrLengthInFile = FileOffset[File+1]-FileOffset[File]
                        if (FileOffset[File+1] >= lastElem):
                            CurrLengthInFile = lastElem-FileOffset[File]
                         
                        Lengths.append(CurrLengthInFile)

        self.NumParticles = np.sum(Lengths)
        self.NumParticlesExact = self.NumParticles
        self.NumSegments = len(Files)

        if verbose:
            print("Checked %d cells." % CountCheck)                  

        return Files, Offsets, Lengths                             


def clone_dir(dir, loc = 'freya'):
    """
    Construct a 'clone' of a specified directory structure.

    This is used to enable storing experimental/development/specialized outputs
    in separate locations from the 'main' simulation repository, but with an
    exactly analogous internal structure.

    Args:
        dir (string): The full path of the directory to clone.
        loc (string, optional): The branch to clone the directory to. Can be
            one of 'freya' or 'virgo'.
    
    Returns:
        string: modified directory name in the specified branch.
    """
        
    dir_parts = dir.split('/')

    num_Hydrangea = dir_parts.count('Hydrangea')
    if dir_parts.count('Hydrangea') == 0:
        print("The input path '" + dir + "' does not seem to contain a directory called 'Hydrangea'...")
        sys.exit(44)

    last_of_pre = dir_parts.index('Hydrangea')
    first_special = last_of_pre + 1

    special_part = '/'.join(dir_parts[first_special:])

    if loc == 'freya':
        return "/freya/ptmp/mpa/ybahe/HYDRANGEA/ANALYSIS/" + special_part
    elif loc == 'virgo':
        return "/virgo/scratch/ybahe/HYDRANGEA/ANALYSIS/" + special_part
    else:
        print("Do not understand requested redirect site '" + loc + "'")
        set_trace()

def get_snepshot_indices(rundir, list='basic'):
    """
    Extract type, number, and aexp for snepshots from a specified list.
    """
    
    snepdir = rundir + '/sneplists/'
    fileName = snepdir + list + '.dat'

    data = ascii.read(fileName)
    
    rootIndex = np.array(data['rootIndex'])
    aexp = np.array(data['aexp'])
    sourceType = np.array(data['sourceType'])
    sourceNum = np.array(data['sourceNum'])
    
    return rootIndex, aexp, sourceType, sourceNum
    

def snap_times(conv = None, list = None):
    """
    Return the times of all Hydrangea snapshots.

    By default, the expansion factors of the 30 snapshots are returned.
    Optionally, two arguments can be provided:

    Args:
        conv (string, 'zred' or 'age'): convert the expansion factors to 
            redshift or age of the Universe (as appropriate).
        list (string): name (without directories) of the output file to read.
    """
    
    if list is None:
        snaptimes_file = currDir + '/hydrangea/OutputLists/hydrangea_snapshots_plus.dat'
    else:
        snaptimes_file = currDir + '/hydrangea/OutputLists/' + list + '.dat'

    snap_times = np.array(ascii.read(snaptimes_file, format = 'no_header')['col1'])

    if conv is None:
        return snap_times

    elif conv == 'zred':
        return 1/snap_times - 1

    elif conv == 'age':
        return Planck13.age(1/snap_times - 1).to(u.Gyr).value

    else:
        print("I do not know what you mean by '" + conv + "'...")
        set_trace()


# -------------------------------------------------------------------------
# -----------------  Obsolete routines ------------------------------------
# -------------------------------------------------------------------------


def get_time(inFile, convert_to = None):
    """Obsolete, should use sim_tools.snap_age() instead."""

    if not h5.is_hdf5(inFile):
        raise ValueError('Input file "' + inFile + '" is not HDF5!')

    aexp = yb.read_hdf5_attribute(inFile, 'Header', 'Time')[0]

    if convert_to is None:
        return aexp

    elif convert_to == 'zred':
        return 1/aexp - 1

    elif convert_to == 'time':
        return Planck13.age(1/aexp-1).value

    else:
        print("I do not know what you mean by '" + conv + "'...")
        set_trace()