# -*- coding: utf-8 -*- """ Created on Wed Dec 9 17:28:39 2015 Collection of routines for Hydrangea post-processing and analysis """ import h5py as h5 import sys from bisect import bisect_right from pdb import set_trace from operator import mul from functools import reduce import numpy as np import glob import time from astropy.io import ascii #import image_routines as imtools #from eagle_routines import rotatebyaxis import os import sim_tools as st from astropy.cosmology import Planck13 import socket import yb_utils as yb # ----------- READ_REGION ------------------- class ReadRegion: """ Set up a region for efficient reading of data from snapshot files. This class can be called with several parameters to allow easy setup of commonly encountered selection regions (sphere, cube, box). Internally, the particle map generated by MapMaker is then read and processed into a (typically small) number of segments to read in. """ def __init__(self, filename, parttype, coordinates, shape = None, verbose=False, exact=False, astro = False, pmfile = None, periodic = False, load_full = False): stime = time.clock() if pmfile is None: self.pmfile = filename else: self.pmfile = pmfile self.filename = filename self.parttype = parttype self.PTName = "PartType" + str(parttype) self.coordinates = coordinates if shape is None: shape = "sphere" self.exact = exact if periodic: self.periodic = True else: self.periodic = False # Deal with capital cases, and set up centres for use later self.centre = None if shape == 'Sphere' or shape == 'sphere': shape = 'sphere' self.centre = coordinates[0:3] if shape == 'Box' or shape == 'box': shape = 'box' self.centre = coordinates[0:3] if shape == 'Cube' or shape == 'cube': shape = 'cube' self.centre = coordinates[0:3] self.shape = shape WorkDir = yb.file_to_dir(self.pmfile) MapFile = WorkDir + "/ParticleMap.hdf5" if load_full: self.loadFull = True self.NumSegments = np.inf return else: f = h5.File(MapFile, 'r') # Select rectangular region of simulation to be loaded box = self._make_selection_box() if verbose: print("Selection box is", box) # Identify cells lying in region of interest CellOffsets, CellLength = self._identify_relevant_cells(box, f, verbose=verbose) self.NumCells = reduce(mul, CellLength, 1) print("Checking %d cells..." % self.NumCells) if self.NumCells == 0: return if self.NumCells > 50000: self.loadFull = True self.NumSegments = np.inf return else: self.loadFull = False # Find the File Index, Offset, and Length for all Segments self.Files, self.Offsets, self.Lengths = self._find_segments(CellOffsets, CellLength, f, verbose=verbose) print("Region setup took {:.3f} sec." .format(time.clock()-stime)) # Experimental section: make list of indices lying EXACTLY in selection region if exact: # Need to explicitly set 'exact = False' here, because we have not # yet set up exact loading (we are doing it right now!) coords = self.read_data("Coordinates", exact = False) if self.centre is not None: relpos = coords - np.array(self.centre)[None, :] else: print("Your shape '" + self.shape + "' is not yet implemented. Sorry.") sys.exit() if self.shape == 'sphere': relrad = np.linalg.norm(relpos,axis=1) self.ind_sel = np.nonzero(relrad <= self.coordinates[3])[0] elif self.shape == 'cube': self.ind_sel = np.nonzero((relpos[:,0] <= self.coordinates[3]) & (relpos[:,1] <= self.coordinates[3]) & (relpos[:,2] <= self.coordinates[3]))[0] elif self.shape == 'box': self.ind_sel = np.nonzero((relpos[:,0] <= (self.coordinates[3]-self.coordinates[0])) & (relpos[:,1] <= (self.coordinates[4]-self.coordinates[1])) & (relpos[:,2] <= (self.coordinates[5]-self.coordinates[2])))[0] else: print("Your shape '" + self.shape + "' is not understood.") sys.exit() self.NumParticlesExact = len(ind_sel) # End of special section for 'exact' keyword print("Selection region contains %d cells, %d segments, and %d particles, spread over %d files" % (self.NumCells, self.NumSegments, self.NumParticles, len(np.unique(self.Files)))) if verbose: print(" (selected files:", np.unique(self.Files), ")") # Small convenience function to update base filename during reading def _swap_filename(self, baseName, number): nameParts = baseName.split('.') nameParts[-2] = str(number) resName = '.'.join(nameParts) return resName def read_data(self, dataSetName, astro = False, verbose = False, return_conv = False, exact=None, filename = None, PTName = None): if filename is None: filename = self.filename if PTName is None: PTName = self.PTName """ Reads a specified dataset for a previously set up region. """ stime = time.clock() if self.NumSegments == 0: if return_conv: return [None, None, None] else: return if self.loadFull: data_full = st.eagleread(filename, PTName + '/' + dataSetName, astro = astro) if astro and not return_conv: data_full = data_full[0] return data_full if exact is None: exact = self.exact Counter = 0 for iiseg in range(self.NumSegments): CurrFileName = self._swap_filename(filename, self.Files[iiseg]) if verbose: print("CurrFileName = '" + CurrFileName +"'") f = h5.File(CurrFileName, 'r') if PTName is not "": if self.PTName not in f: continue MainGroup = f[self.PTName] DSet = MainGroup[dataSetName] else: DSet = f[dataSetName] CurrFirst = self.Offsets[iiseg] CurrLength = self.Lengths[iiseg] CurrBeyLast = CurrFirst+CurrLength if iiseg == 0: full_shape = list(DSet.shape) full_shape[0] = self.NumParticles data_full = np.empty(full_shape, DSet.dtype) if len(DSet.shape) == 1: data_full[Counter:Counter+CurrLength] = DSet[CurrFirst:CurrBeyLast] else: #print("iiseg = {:d}" .format(iiseg)) #if (iiseg == 732): # set_trace() data_full[Counter:Counter+CurrLength, :] = DSet[CurrFirst:CurrBeyLast, :] Counter += CurrLength #Limit selection if 'exact' is set: if exact: if len(data_full.shape) == 1: data_full = data_full[self.ind_sel] else: data_full = data_full[self.ind_sel, :] if astro: # Determine code --> physical conversion factors hscale_exponent = DSet.attrs["h-scale-exponent"] ascale_exponent = DSet.attrs["aexp-scale-exponent"] header = f["/Header"] aexp = header.attrs["ExpansionFactor"] h_hubble = header.attrs["HubbleParam"] conv_astro = aexp**ascale_exponent * h_hubble**hscale_exponent data_full *= conv_astro else: aexp = -100 conv_astro = -100 print("Reading '" + dataSetName + "' took {:.3f} sec." .format(time.clock()-stime)) if return_conv: return data_full, conv_astro, aexp else: return data_full def total_in_region(self, dataSetName, weightquant = False, astro = False): """ Convenience function to compute the total or average of 'quantity' """ if self.exact is False: print("") print("******************************************************************") print("*************************** WARNING ****************************") print("******************************************************************") print("") print("You have not set the 'exact' switch when establising this region.") print("Be aware that the reported total may include a contribution from") print("particles outside the target region. Proceed with caution...") print("") print("******************************************************************") print("") data = self.read_data(dataSetName, astro = astro) if weightquant == False: return np.sum(data, axis=0) elif weightquant == None: return np.mean(data, axis=0) else: weights = self.read_data(weightquant, astro = astro) return np.average(data, weights = weights, axis=0) def _make_selection_box(self): """ Find the box enclosing the selection region. For the moment, *all* particles in this region will be loaded. In future, we may do something more fancy that takes the actual selection shape into account... """ coords = self.coordinates shape = self.shape if shape == "sphere" or shape == "Sphere": if len(coords) < 4: print("A sphere needs four coordinates: its centre (3), and radius (1)") sys.exit(1) box = [[coords[0]-coords[3], coords[1]-coords[3],coords[2]-coords[3]], [coords[0]+coords[3], coords[1]+coords[3],coords[2]+coords[3]]] elif (shape == "cube" or shape == "Cube"): if len(coords) < 4: print("A cube needs four coordinates: its lower corner (3) and side-length (1)") sys.exit(1) box = [coords[0:3], [coords[0]+coords[3], coords[1]+coords[3], coords[2]+coords[3]]] elif (shape == "box" or shape == "Box"): box = [[coords[0], coords[1], coords[2]], [coords[3], coords[4], coords[5]]] else: print("Your shape '" + shape + "' is not yet implemented.") sys.exit(1) return box def _identify_relevant_cells(self, box, f, verbose=False): """ Identify all cells intersecting the selection box. """ Header = f["Header"] NumPartTotal = Header.attrs["NumPart_Total"][self.parttype] if NumPartTotal == 0: return [0,0,0], [0,0,0] MainGroup = f[self.PTName] CellMins = MainGroup.attrs["CellRegionCorner"] CellSize = MainGroup.attrs["CellSize"][0] if verbose: print("CellMins =", CellMins) print("CellSize =", CellSize) CellOffsets = [int((box[0][0]-CellMins[0])/CellSize), int((box[0][1]-CellMins[1])/CellSize), int((box[0][2]-CellMins[2])/CellSize)] CellLengths = [int((box[1][0]-CellMins[0])/CellSize)-CellOffsets[0]+1, int((box[1][1]-CellMins[1])/CellSize)-CellOffsets[1]+1, int((box[1][2]-CellMins[2])/CellSize)-CellOffsets[2]+1] if verbose: print("CellOffsets =", CellOffsets) print("CellLengths =", CellLengths) return CellOffsets, CellLengths def _find_segments(self, CellOffsets, CellLengths, f, verbose=False): """ Determine the 'segments' that have to be loaded. A segment is a section of a cell lying entirely in one file. """ MainGroup = f[self.PTName] CellCount = MainGroup["CellCount"] CellOffset = MainGroup["CellOffset"] FileOffset = MainGroup["FileOffset"][:] NumCellsPerDim = MainGroup.attrs["NumCellsPerDim"] if verbose: print("NumCellsPerDim = ", NumCellsPerDim) Files, Offsets, Lengths = [], [], [] CountCheck = 0 FullCheck = 0 for cz in range(CellOffsets[2], CellOffsets[2]+CellLengths[2]): for cy in range(CellOffsets[1], CellOffsets[1]+CellLengths[1]): for cx in range(CellOffsets[0], CellOffsets[0]+CellLengths[0]): cxx, cyy, czz = cx, cy, cz if self.periodic: if cxx < 0: cxx += NumCellsPerDim[0] elif cxx >= NumCellsPerDim[0]: cxx -= NumCellsPerDim[0] if cyy < 0: cyy += NumCellsPerDim[1] elif cyy >= NumCellsPerDim[1]: cyy -= NumCellsPerDim[1] if czz < 0: czz += NumCellsPerDim[2] elif czz >= NumCellsPerDim[2]: czz -= NumCellsPerDim[2] index = cxx + cyy*NumCellsPerDim[0] + czz*NumCellsPerDim[0]*NumCellsPerDim[1] CountCheck += 1 if (CountCheck % 10000 == 0): print("Checking cell %d (occupied so far: %d)" % (CountCheck, FullCheck)) if index < 0 or index >= CellCount.shape[0]: continue if (CellCount[index] == 0): continue FullCheck += 1 firstElem = CellOffset[index] lastElem = CellOffset[index]+CellCount[index]-1 File = bisect_right(FileOffset, firstElem)-1 CurrOffsetInFile = CellOffset[index] - FileOffset[File] if CurrOffsetInFile >= FileOffset[File+1]: set_trace() # Set default value: CurrLengthInFile = CellCount[index] # Deal with special case of multi-file cell: # Length extends to end of file if (FileOffset[File+1] <= lastElem): # Cell extends across file boundaries CurrLengthInFile = FileOffset[File+1] - firstElem Files.append(File) Offsets.append(CurrOffsetInFile) Lengths.append(CurrLengthInFile) while(FileOffset[File+1] <= lastElem): File += 1 Files.append(File) Offsets.append(0) CurrLengthInFile = FileOffset[File+1]-FileOffset[File] if (FileOffset[File+1] >= lastElem): CurrLengthInFile = lastElem-FileOffset[File] Lengths.append(CurrLengthInFile) self.NumParticles = np.sum(Lengths) self.NumParticlesExact = self.NumParticles self.NumSegments = len(Files) if verbose: print("Checked %d cells." % CountCheck) return Files, Offsets, Lengths def clone_dir(dir, loc = 'freya'): dir_parts = dir.split('/') num_Hydrangea = dir_parts.count('Hydrangea') if dir_parts.count('Hydrangea') == 0: print("The input path '" + dir + "' does not seem to contain a directory called 'Hydrangea'...") sys.exit(44) last_of_pre = dir_parts.index('Hydrangea') first_special = last_of_pre + 1 special_part = '/'.join(dir_parts[first_special:]) if loc == 'freya': return "/freya/ptmp/mpa/ybahe/HYDRANGEA/ANALYSIS/" + special_part elif loc == 'virgo': return "/virgo/scratch/ybahe/HYDRANGEA/ANALYSIS/" + special_part else: print("Do not understand requested redirect site '" + loc + "'") set_trace() def snap_times(conv = None, inputList = None): if socket.gethostname() == 'quasar': baseDir = '/data3/Hydrangea/OUTPUT_LISTS/' else: baseDir = '/net/quasar/data3/Hydrangea/OUTPUT_LISTS/' if inputList is None: snaptimes_file = baseDir + 'SnapOnly/hydrangea_snapshots_plus.dat' else: snaptimes_file = baseDir + inputList snap_times = np.array(ascii.read(snaptimes_file, format = 'no_header')['col1']) if conv is None: return snap_times elif conv == 'zred': return 1/snap_times - 1 elif conv == 'time': return Planck13.lookback_time(np.inf).value-Planck13.lookback_time(1/snap_times-1).value else: print("I do not know what you mean by '" + conv + "'...") set_trace()