# -*- coding: utf-8 -*- """ Created on Wed Dec 9 17:28:39 2015 Collection of routines for Hydrangea post-processing and analysis [*: self-contained] ReadRegion: class for reading in a small sub-region of a Hydrangea output. clone_dir: clone a directory name into a side-branch. *snap_times: return the times of all Hydrangea outputs (default: snapshots). get_snepshot_indices: read and return the snepshot indices for one simulation """ import h5py as h5 import sys from bisect import bisect_right from pdb import set_trace from operator import mul from functools import reduce import numpy as np import time from astropy.io import ascii import os import sim_tools as st from astropy.cosmology import Planck13 import astropy.units as u import yb_utils as yb currDir = os.path.dirname(os.path.realpath(__file__)) + "/" # ----------- READ_REGION ------------------- class ReadRegion: """ Set up a region for efficient reading of data from snapshot files. This class can be called with several parameters to allow easy setup of commonly encountered selection regions (sphere, cube, box). Internally, the particle map generated by MapMaker is then read and processed into a (typically small) number of segments to read in. """ def __init__(self, filename, parttype, coordinates, shape = None, verbose=False, exact=False, astro = False, pmfile = None, periodic = False, load_full = False): stime = time.clock() if pmfile is None: self.pmfile = filename else: self.pmfile = pmfile self.filename = filename self.parttype = parttype self.PTName = "PartType" + str(parttype) self.coordinates = coordinates if shape is None: shape = "sphere" self.exact = exact if periodic: self.periodic = True else: self.periodic = False # Deal with capital cases, and set up centres for use later self.centre = None if shape == 'Sphere' or shape == 'sphere': shape = 'sphere' self.centre = coordinates[0:3] if shape == 'Box' or shape == 'box': shape = 'box' self.centre = coordinates[0:3] if shape == 'Cube' or shape == 'cube': shape = 'cube' self.centre = coordinates[0:3] self.shape = shape WorkDir = yb.file_to_dir(self.pmfile) MapFile = WorkDir + "/ParticleMap.hdf5" if not os.path.exists(MapFile): load_full = True if load_full: self.loadFull = True self.NumSegments = np.inf return else: f = h5.File(MapFile, 'r') # Select rectangular region of simulation to be loaded box = self._make_selection_box() if verbose: print("Selection box is", box) # Identify cells lying in region of interest CellOffsets, CellLength = self._identify_relevant_cells(box, f, verbose=verbose) self.NumCells = reduce(mul, CellLength, 1) print("Checking %d cells..." % self.NumCells) if self.NumCells == 0: return if self.NumCells > 50000: self.loadFull = True self.NumSegments = np.inf return else: self.loadFull = False # Find the File Index, Offset, and Length for all Segments self.Files, self.Offsets, self.Lengths = self._find_segments(CellOffsets, CellLength, f, verbose=verbose) print("Region setup took {:.3f} sec." .format(time.clock()-stime)) # Experimental section: make list of indices lying EXACTLY in selection region if exact: # Need to explicitly set 'exact = False' here, because we have not # yet set up exact loading (we are doing it right now!) coords = self.read_data("Coordinates", exact = False) if self.centre is not None: relpos = coords - np.array(self.centre)[None, :] else: print("Your shape '" + self.shape + "' is not yet implemented. Sorry.") sys.exit() if self.shape == 'sphere': relrad = np.linalg.norm(relpos,axis=1) self.ind_sel = np.nonzero(relrad <= self.coordinates[3])[0] elif self.shape == 'cube': self.ind_sel = np.nonzero((relpos[:,0] <= self.coordinates[3]) & (relpos[:,1] <= self.coordinates[3]) & (relpos[:,2] <= self.coordinates[3]))[0] elif self.shape == 'box': self.ind_sel = np.nonzero((relpos[:,0] <= (self.coordinates[3]-self.coordinates[0])) & (relpos[:,1] <= (self.coordinates[4]-self.coordinates[1])) & (relpos[:,2] <= (self.coordinates[5]-self.coordinates[2])))[0] else: print("Your shape '" + self.shape + "' is not understood.") sys.exit() self.NumParticlesExact = len(ind_sel) # End of special section for 'exact' keyword print("Selection region contains %d cells, %d segments, and %d particles, spread over %d files" % (self.NumCells, self.NumSegments, self.NumParticles, len(np.unique(self.Files)))) if verbose: print(" (selected files:", np.unique(self.Files), ")") # Small convenience function to update base filename during reading def _swap_filename(self, baseName, number): nameParts = baseName.split('.') nameParts[-2] = str(number) resName = '.'.join(nameParts) return resName def read_data(self, dataSetName, astro = False, verbose = False, return_conv = False, exact=None, filename = None, PTName = None, singleFile = False): """ Reads a specified dataset for a previously set up region. Args: dataSetName (string): The dataset to read from, including groups where appropriate. The leading 'PartType[x]' must *not* be included! astro (bool): If True, convert values to proper astronomical units (default: False) verbose (bool): Enable additional log messages (default: False) return_conv (bool): If True, returns a list of [data, conv_astro, aexp], where conv_astro = conversion internal --> astro. (default: False) exact (bool): Only return data for particles lying in the exact specified selection region. If None (default), the value used to set up the ReadRegion is used. filename (string): Specifies an alternative path to read data from. This is useful for reading data from ancillary catalogues. By default (None), the file passed to ReadRegion is used. PTName (string): Specifies an alternative particle-type group name. By default (None), 'PartType[x]' is used. singleFile (bool): Set to True to only read from a single file. This is useful for ancillary catalogues, default: False. """ if filename is None: filename = self.filename if PTName is None: PTName = self.PTName stime = time.time() if self.NumSegments == 0: if return_conv: return [None, None, None] else: return if self.loadFull: if singleFile: data_full = yb.read_hdf5(filename, PTName + '/' + dataSetName) if astro: MainGroup = f[PTName] DSet = MainGroup[dataSetName] hscale_exponent = DSet.attrs["h-scale-exponent"] ascale_exponent = DSet.attrs["aexp-scale-exponent"] header = f["/Header"] aexp = header.attrs["ExpansionFactor"] h_hubble = header.attrs["HubbleParam"] conv_astro = (aexp**ascale_exponent * h_hubble**hscale_exponent) data_full *= conv_astro else: data_full = st.eagleread( filename, PTName + '/' + dataSetName, astro = astro) if astro and not return_conv: data_full = data_full[0] return data_full if exact is None: exact = self.exact Counter = 0 for iiseg in range(self.NumSegments): if not singleFile: CurrFileName = self._swap_filename(filename, self.Files[iiseg]) else: CurrFileName = filename if verbose: print("CurrFileName = '" + CurrFileName +"'") f = h5.File(CurrFileName, 'r') if PTName is not "": if PTName not in f: continue MainGroup = f[PTName] DSet = MainGroup[dataSetName] else: DSet = f[dataSetName] CurrFirst = self.Offsets[iiseg] CurrLength = self.Lengths[iiseg] CurrBeyLast = CurrFirst+CurrLength # Additional bit (added 4-Apr-19): # Modify these values to read from single file instead if singleFile: CurrFirst += self.FileOffsets[self.Files[iiseg]] CurrBeyLast += self.FileOffsets[self.Files[iiseg]] if iiseg == 0: full_shape = list(DSet.shape) full_shape[0] = self.NumParticles data_full = np.empty(full_shape, DSet.dtype) if len(DSet.shape) == 1: data_full[Counter:Counter+CurrLength] = DSet[CurrFirst:CurrBeyLast] else: #print("iiseg = {:d}" .format(iiseg)) #if (iiseg == 732): # set_trace() data_full[Counter:Counter+CurrLength, :] = DSet[CurrFirst:CurrBeyLast, :] Counter += CurrLength #Limit selection if 'exact' is set: if exact: if len(data_full.shape) == 1: data_full = data_full[self.ind_sel] else: data_full = data_full[self.ind_sel, :] if astro: # Determine code --> physical conversion factors hscale_exponent = DSet.attrs["h-scale-exponent"] ascale_exponent = DSet.attrs["aexp-scale-exponent"] header = f["/Header"] aexp = header.attrs["ExpansionFactor"] h_hubble = header.attrs["HubbleParam"] conv_astro = aexp**ascale_exponent * h_hubble**hscale_exponent data_full *= conv_astro else: aexp = -100 conv_astro = -100 print("Reading '" + dataSetName + "' took {:.3f} sec." .format(time.time()-stime)) if return_conv: return data_full, conv_astro, aexp else: return data_full def total_in_region(self, dataSetName, weightquant = False, astro = False): """ Convenience function to compute the total or average of 'quantity' """ if self.exact is False: print("") print("******************************************************************") print("*************************** WARNING ****************************") print("******************************************************************") print("") print("You have not set the 'exact' switch when establising this region.") print("Be aware that the reported total may include a contribution from") print("particles outside the target region. Proceed with caution...") print("") print("******************************************************************") print("") data = self.read_data(dataSetName, astro = astro) if weightquant == False: return np.sum(data, axis=0) elif weightquant == None: return np.mean(data, axis=0) else: weights = self.read_data(weightquant, astro = astro) return np.average(data, weights = weights, axis=0) def _make_selection_box(self): """ Find the box enclosing the selection region. For the moment, *all* particles in this region will be loaded. In future, we may do something more fancy that takes the actual selection shape into account... """ coords = self.coordinates shape = self.shape if shape == "sphere" or shape == "Sphere": if len(coords) < 4: print("A sphere needs four coordinates: its centre (3), and radius (1)") sys.exit(1) box = [[coords[0]-coords[3], coords[1]-coords[3],coords[2]-coords[3]], [coords[0]+coords[3], coords[1]+coords[3],coords[2]+coords[3]]] elif (shape == "cube" or shape == "Cube"): if len(coords) < 4: print("A cube needs four coordinates: its lower corner (3) and side-length (1)") sys.exit(1) box = [coords[0:3], [coords[0]+coords[3], coords[1]+coords[3], coords[2]+coords[3]]] elif (shape == "box" or shape == "Box"): box = [[coords[0], coords[1], coords[2]], [coords[3], coords[4], coords[5]]] else: print("Your shape '" + shape + "' is not yet implemented.") sys.exit(1) return box def _identify_relevant_cells(self, box, f, verbose=False): """ Identify all cells intersecting the selection box. """ Header = f["Header"] NumPartTotal = Header.attrs["NumPart_Total"][self.parttype] if NumPartTotal == 0: return [0,0,0], [0,0,0] MainGroup = f[self.PTName] CellMins = MainGroup.attrs["CellRegionCorner"] CellSize = MainGroup.attrs["CellSize"][0] if verbose: print("CellMins =", CellMins) print("CellSize =", CellSize) CellOffsets = [int((box[0][0]-CellMins[0])/CellSize), int((box[0][1]-CellMins[1])/CellSize), int((box[0][2]-CellMins[2])/CellSize)] CellLengths = [int((box[1][0]-CellMins[0])/CellSize)-CellOffsets[0]+1, int((box[1][1]-CellMins[1])/CellSize)-CellOffsets[1]+1, int((box[1][2]-CellMins[2])/CellSize)-CellOffsets[2]+1] if verbose: print("CellOffsets =", CellOffsets) print("CellLengths =", CellLengths) return CellOffsets, CellLengths def _find_segments(self, CellOffsets, CellLengths, f, verbose=False): """ Determine the 'segments' that have to be loaded. A segment is a section of a cell lying entirely in one file. """ MainGroup = f[self.PTName] CellCount = MainGroup["CellCount"] CellOffset = MainGroup["CellOffset"] self.FileOffsets = MainGroup["FileOffset"][:] FileOffset = self.FileOffsets NumCellsPerDim = MainGroup.attrs["NumCellsPerDim"] if verbose: print("NumCellsPerDim = ", NumCellsPerDim) Files, Offsets, Lengths = [], [], [] CountCheck = 0 FullCheck = 0 for cz in range(CellOffsets[2], CellOffsets[2]+CellLengths[2]): for cy in range(CellOffsets[1], CellOffsets[1]+CellLengths[1]): for cx in range(CellOffsets[0], CellOffsets[0]+CellLengths[0]): cxx, cyy, czz = cx, cy, cz if self.periodic: if cxx < 0: cxx += NumCellsPerDim[0] elif cxx >= NumCellsPerDim[0]: cxx -= NumCellsPerDim[0] if cyy < 0: cyy += NumCellsPerDim[1] elif cyy >= NumCellsPerDim[1]: cyy -= NumCellsPerDim[1] if czz < 0: czz += NumCellsPerDim[2] elif czz >= NumCellsPerDim[2]: czz -= NumCellsPerDim[2] index = cxx + cyy*NumCellsPerDim[0] + czz*NumCellsPerDim[0]*NumCellsPerDim[1] CountCheck += 1 if (CountCheck % 10000 == 0): print("Checking cell %d (occupied so far: %d)" % (CountCheck, FullCheck)) if index < 0 or index >= CellCount.shape[0]: continue if (CellCount[index] == 0): continue FullCheck += 1 firstElem = CellOffset[index] lastElem = CellOffset[index]+CellCount[index]-1 File = bisect_right(FileOffset, firstElem)-1 CurrOffsetInFile = CellOffset[index] - FileOffset[File] if CurrOffsetInFile >= FileOffset[File+1]: set_trace() # Set default value: CurrLengthInFile = CellCount[index] # Deal with special case of multi-file cell: # Length extends to end of file if (FileOffset[File+1] <= lastElem): # Cell extends across file boundaries CurrLengthInFile = FileOffset[File+1] - firstElem Files.append(File) Offsets.append(CurrOffsetInFile) Lengths.append(CurrLengthInFile) while(FileOffset[File+1] <= lastElem): File += 1 Files.append(File) Offsets.append(0) CurrLengthInFile = FileOffset[File+1]-FileOffset[File] if (FileOffset[File+1] >= lastElem): CurrLengthInFile = lastElem-FileOffset[File] Lengths.append(CurrLengthInFile) self.NumParticles = np.sum(Lengths) self.NumParticlesExact = self.NumParticles self.NumSegments = len(Files) if verbose: print("Checked %d cells." % CountCheck) return Files, Offsets, Lengths def clone_dir(dir, loc = 'freya'): """ Construct a 'clone' of a specified directory structure. This is used to enable storing experimental/development/specialized outputs in separate locations from the 'main' simulation repository, but with an exactly analogous internal structure. Args: dir (string): The full path of the directory to clone. loc (string, optional): The branch to clone the directory to. Can be one of 'freya' or 'virgo'. Returns: string: modified directory name in the specified branch. """ dir_parts = dir.split('/') num_Hydrangea = dir_parts.count('Hydrangea') if dir_parts.count('Hydrangea') == 0: print("The input path '" + dir + "' does not seem to contain a directory called 'Hydrangea'...") sys.exit(44) last_of_pre = dir_parts.index('Hydrangea') first_special = last_of_pre + 1 special_part = '/'.join(dir_parts[first_special:]) if loc == 'freya': return "/freya/ptmp/mpa/ybahe/HYDRANGEA/ANALYSIS/" + special_part elif loc == 'virgo': return "/virgo/scratch/ybahe/HYDRANGEA/ANALYSIS/" + special_part else: print("Do not understand requested redirect site '" + loc + "'") set_trace() def get_snepshot_indices(rundir, list='basic'): """ Extract type, number, and aexp for snepshots from a specified list. """ snepdir = rundir + '/sneplists/' fileName = snepdir + list + '.dat' data = ascii.read(fileName) rootIndex = np.array(data['rootIndex']) aexp = np.array(data['aexp']) sourceType = np.array(data['sourceType']) sourceNum = np.array(data['sourceNum']) return rootIndex, aexp, sourceType, sourceNum def snap_times(conv = None, list = None): """ Return the times of all Hydrangea snapshots. By default, the expansion factors of the 30 snapshots are returned. Optionally, two arguments can be provided: Args: conv (string, 'zred' or 'age'): convert the expansion factors to redshift or age of the Universe (as appropriate). list (string): name (without directories) of the output file to read. """ if list is None: snaptimes_file = currDir + '/hydrangea/OutputLists/hydrangea_snapshots_plus.dat' else: snaptimes_file = currDir + '/hydrangea/OutputLists/' + list + '.dat' snap_times = np.array(ascii.read(snaptimes_file, format = 'no_header')['col1']) if conv is None: return snap_times elif conv == 'zred': return 1/snap_times - 1 elif conv == 'age': return Planck13.age(1/snap_times - 1).to(u.Gyr).value else: print("I do not know what you mean by '" + conv + "'...") set_trace() # ------------------------------------------------------------------------- # ----------------- Obsolete routines ------------------------------------ # ------------------------------------------------------------------------- def get_time(inFile, convert_to = None): """Obsolete, should use sim_tools.snap_age() instead.""" if not h5.is_hdf5(inFile): raise ValueError('Input file "' + inFile + '" is not HDF5!') aexp = yb.read_hdf5_attribute(inFile, 'Header', 'Time')[0] if convert_to is None: return aexp elif convert_to == 'zred': return 1/aexp - 1 elif convert_to == 'time': return Planck13.age(1/aexp-1).value else: print("I do not know what you mean by '" + conv + "'...") set_trace()