11"""
22convert.py
3- Written by Tyler Sutterley (12/2022 )
3+ Written by Tyler Sutterley (03/2024 )
44Utilities for converting ICESat-2 HDF5 files into different formats
55
66PYTHON DEPENDENCIES:
1616 pandas: Python Data Analysis Library
1717 https://pandas.pydata.org/
1818
19- PROGRAM DEPENDENCIES:
20- convert_delta_time.py: converts delta times into Julian and year-decimal
21- time.py: Utilities for calculating time operations
22-
2319UPDATE HISTORY:
20+ Updated 03/2024: use pathlib to define and operate on paths
2421 Updated 12/2022: place some imports behind try/except statements
2522 Updated 06/2022: place zarr and pandas imports behind try/except statements
2623 Updated 04/2022: updated docstrings to numpy documentation format
3229 Updated 08/2020: added output in pandas dataframe for ATL06 and ATL08
3330 Written 06/2020
3431"""
35- import os
3632import re
33+ import pathlib
3734import warnings
3835import itertools
3936import posixpath
4037import numpy as np
41- from icesat2_toolkit .convert_delta_time import convert_delta_time
4238
4339# attempt imports
4440try :
@@ -115,16 +111,14 @@ def HDF5_to_zarr(self, **kwds):
115111 **kwds: dict
116112 keyword arguments for output zarr converter
117113 """
118- # split extension from HDF5 file
119- if isinstance (self .filename , str ):
120- fileBasename ,fileExtension = os .path .splitext (self .filename )
121- else :
122- fileBasename ,fileExtension = os .path .splitext (self .filename .filename )
123114 # output zarr file
124- zarr_file = os .path .expanduser (f'{ fileBasename } .zarr' )
115+ if isinstance (self .filename , (str , pathlib .Path )):
116+ zarr_file = pathlib .Path (self .filename ).with_suffix ('.zarr' )
117+ else :
118+ zarr_file = pathlib .Path (self .filename .filename ).with_suffix ('.zarr' )
125119 # copy everything from the HDF5 file to the zarr file
126120 with h5py .File (self .filename , mode = 'r' ) as source :
127- dest = zarr .open_group (zarr_file ,mode = 'w' )
121+ dest = zarr .open_group (zarr_file , mode = 'w' )
128122 # value checks on output zarr
129123 if not hasattr (dest , 'create_dataset' ):
130124 raise ValueError ('dest must be a group, got {!r}' .format (dest ))
@@ -142,16 +136,14 @@ def HDF5_to_HDF5(self, **kwds):
142136 **kwds: dict
143137 keyword arguments for output HDF5 converter
144138 """
145- # split extension from HDF5 file
146- if isinstance (self .filename , str ):
147- fileBasename ,fileExtension = os .path .splitext (self .filename )
148- else :
149- fileBasename ,fileExtension = os .path .splitext (self .filename .filename )
150139 # output HDF5 file
151- hdf5_file = os .path .expanduser (f'{ fileBasename } .h5' )
140+ if isinstance (self .filename , (str , pathlib .Path )):
141+ hdf5_file = pathlib .Path (self .filename ).with_suffix ('.h5' )
142+ else :
143+ hdf5_file = pathlib .Path (self .filename .filename ).with_suffix ('.h5' )
152144 # copy everything from the HDF5 file
153145 with h5py .File (self .filename ,mode = 'r' ) as source :
154- dest = h5py .File (hdf5_file ,mode = 'w' )
146+ dest = h5py .File (hdf5_file , mode = 'w' )
155147 # value checks on output HDF5
156148 if not hasattr (dest , 'create_dataset' ):
157149 raise ValueError ('dest must be a group, got {!r}' .format (dest ))
@@ -270,19 +262,17 @@ def HDF5_to_ascii(self, **kwds):
270262 r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$' )
271263 # split extension from HDF5 file
272264 # extract parameters from ICESat2 HDF5 file
273- if isinstance (self .filename , str ):
274- fileBasename ,fileExtension = os .path .splitext (self .filename )
275- # extract parameters from ICESat2 HDF5 file
276- SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
277- rx .findall (os .path .basename (self .filename )).pop ()
265+ if isinstance (self .filename , (str , pathlib .Path )):
266+ hdf5_file = pathlib .Path (self .filename )
278267 else :
279- fileBasename ,fileExtension = os .path .splitext (self .filename .filename )
280- SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
281- rx .findall (os .path .basename (self .filename .filename )).pop ()
268+ hdf5_file = pathlib .Path (self .filename .filename )
269+ # extract parameters from ICESat2 HDF5 file
270+ SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
271+ rx .findall (hdf5_file .name ).pop ()
282272 # output file suffix for csv or tab-delimited text
283273 delimiter = ',' if self .reformat == 'csv' else '\t '
284274 # copy bare minimum variables from the HDF5 file to the ascii file
285- source = h5py .File (self .filename ,mode = 'r' )
275+ source = h5py .File (self .filename , mode = 'r' )
286276
287277 # find valid beam groups by testing for particular variables
288278 if (PRD == 'ATL06' ):
@@ -400,8 +390,9 @@ def HDF5_to_ascii(self, **kwds):
400390 output = np .column_stack ([values [v ][valid ] for v in vnames ])
401391
402392 # output ascii file
403- ascii_file = f'{ fileBasename } _{ gtx } .{ self .reformat } '
404- fid = open (os .path .expanduser (ascii_file ), mode = 'w' , encoding = 'utf8' )
393+ granule = f'{ hdf5_file .stem } _{ gtx } .{ self .reformat } '
394+ ascii_file = hdf5_file .parent .joinpath (granule )
395+ fid = ascii_file .open (mode = 'w' , encoding = 'utf8' )
405396 # print YAML header to top of file
406397 fid .write ('{0}:\n ' .format ('header' ))
407398 # global attributes for file
@@ -460,13 +451,13 @@ def HDF5_to_dataframe(self, **kwds):
460451 r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$' )
461452 # split extension from HDF5 file
462453 # extract parameters from ICESat2 HDF5 file
463- if isinstance (self .filename , str ):
464- # extract parameters from ICESat2 HDF5 file
465- SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
466- rx .findall (os .path .basename (self .filename )).pop ()
454+ if isinstance (self .filename , (str , pathlib .Path )):
455+ hdf5_file = pathlib .Path (self .filename )
467456 else :
468- SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
469- rx .findall (os .path .basename (self .filename .filename )).pop ()
457+ hdf5_file = pathlib .Path (self .filename .filename )
458+ # extract parameters from ICESat2 HDF5 file
459+ SUB ,PRD ,HEM ,YY ,MM ,DD ,HH ,MN ,SS ,TRK ,CYCL ,GRAN ,RL ,VERS ,AUX = \
460+ rx .findall (hdf5_file .name ).pop ()
470461
471462 # copy bare minimum variables from the HDF5 file to pandas data frame
472463 source = h5py .File (self .filename ,mode = 'r' )
0 commit comments