Reading an HDF file with C, FORTRAN, Python, IDL, MATLAB and R
ICARE HDF reader
ICARE has developed a package of libraries written in Fortran77, Fortran90, C, Python, IDL, MATLAB and R They mainly contain functions for :- obtaining informations about the structure of an HDF file
- extracting SDS data
- reading SDS and file attributes
- calibrating data
Download
Source can be downloaded here :icare_hdf_reader.v1.0.1.tar.gz
icare_hdf5_reader.tar.gz
Package can be browsed here :
icare_hdf_reader
icare_hdf5_reader
Usage
See the the README files of the desired language for a detailed description of the way to use the library Examples In all languages directories, some real examples of usage of the library are available in the ./example directory. This is the place to begin for a quick startLanguage
HDF4
PYTHON
# -*- coding: utf-8 -*- #!/usr/local/bin/python from pyhdf import SD # HDF file and SDS names FILE_NAME = "MYD04_L2.A2013060.1300.051.2013062021359.hdf" SDS_NAME = "Optical_Depth_Land_And_Ocean" # open the hdf file hdf = SD.SD(FILE_NAME) # select and read the sds data sds = hdf.select(SDS_NAME) data = sds.get() # get dataset dimensions nrows, ncols = data.shape # data.shape: (3712, 3712) in the SEVIRI AER-OC example, 203 print data.shape # data.shape: (3712, 3712) in the SEVIRI AER-OC example, (203, 135) i=200 # row index j=125 # col index print data[200,125] # Terminate access to the data set sds.endaccess() # Terminate access to the SD interface and close the file hdf.end()
C
#include "mfhdf.h" const char file_name[100]="MYD04_L2.A2013060.1300.051.2013062021359.hdf"; char sds_name[100]="Optical_Depth_Land_And_Ocean"; main( ) { /************************* Variable declaration **************************/ int32 sd_id, sds_id, sds_index, rank, data_type, n_attrs; int32 dim_sizes[2]; int32 start[2]; int32 stride[2]; int32 edges[2]; int32 nrows, ncols; void *data; int i, j, k; /********************* End of variable declaration ***********************/ /* Open the file */ sd_id = SDstart (file_name, DFACC_READ); /* Get the index of the given data set SDS_NAME */ sds_index = SDnametoindex(sd_id, sds_name); sds_id = SDselect (sd_id, sds_index); /* Get the name, rank, dimension sizes, data type and number of attributes for a data set */ SDgetinfo(sds_id, sds_name, &rank, dim_sizes, &data_type, &n_attrs); nrows = dim_sizes[0]; ncols = dim_sizes[1]; start[0] = 0; /* index of first row to read */ start[1] = 0; /* index of first column to read */ edges[0] = dim_sizes[0]; /* the number of rows to read */ edges[1] = dim_sizes[1]; /* the number of cols to read */ stride[0] = 1; stride[1] = 1; data = (void *)malloc(nrows*ncols*DFKNTsize(data_type)); /* Read entire data into data array The array stride (i.e. step) specifies the reading pattern along each dimension. For example, if one of the elements of the array stride is 1, then every element along the corresponding dimension of the data set will be read. If one of the elements of the array stride is 2, then every other element along the corresponding dimension of the data set will be read, and so on. Specifying stride value of NULL in the C interface or setting all values of the array stride to 1 in either interface (C or FORTRAN) specifies the contiguous reading of data. */ SDreaddata (sds_id, start, stride, edges, (VOIDP)data); i=200; // row index j=125; // col index k = i*ncols + j; printf ("%d\n ", ((int16*) data)[k]); /* Terminate access to the data set. */ SDendaccess (sds_id); /* Terminate access to the SD interface and close the file. */ SDend (sd_id); free(data); };
FORTRAN
program read_data implicit none C Parameter declaration integer DFACC_READ, DFNT_INT32 parameter(DFACC_READ = 1, DFNT_INT32 = 24) integer MAXDIM parameter(MAXDIM=100000) C Function declaration integer sfstart, sfn2index, sfselect, sfginfo, sfrdata integer sfendacc, sfend C ************************** Variable declaration C ************************** character*100 sds_name character*100 file_name integer sd_id, sds_id, sds_index, status integer rank, data_type, n_attrs integer dim_sizes(32), start(32), edges(32), stride(32) integer*2 data(MAXDIM) integer nrows, ncols, i, j, k C ************************** End of variable declaration C ************************** file_name="MYD04_L2.A2013060.1300.051.2013062021359.hdf" sds_name="Optical_Depth_Land_And_Ocean" C Open the file sd_id = sfstart(file_name, DFACC_READ) C Get the index of the given data set SDS_NAME sds_index = sfn2index(sd_id, sds_name) sds_id = sfselect(sd_id, sds_index) C Get the name, rank, dimension sizes, data type and number of C attributes for a data set print*, sds_id status = sfginfo(sds_id,sds_name,rank,dim_sizes,data_type,n_attrs) nrows = dim_sizes(2) ncols = dim_sizes(1) C Define the location, pattern, and size of the data set start(1) = 0 ! index of first row to read start(2) = 0 ! index of first column to read edges(1) = dim_sizes(1) ! the number of cols to read edges(2) = dim_sizes(2) ! the number of rows to read stride(1) = 1 ! to read entire data stride(2) = 1 C Read entire data into data array. The array stride (i.e. step) specifies C the reading pattern along each dimension. C The sfrdata routine reads numeric scientific data and sfrcdata reads C character scientific data status = sfrdata(sds_id, start, stride, edges, data) i=201 ! row index j=126 ! col index k = (i-1)*ncols + j print*, data(k) C Terminate access to the data set status = sfendacc(sds_id) C Terminate access to the SD interface and close the file status = sfend(sd_id) end
IDL
PRO read_hdf FILE_NAME = "MYD04_L2.A2013060.1300.051.2013062021359.hdf" ; nrows=203, ncols=135 SDS_NAME = "Optical_Depth_Land_And_Ocean" ; Open the file sd_id = HDF_SD_START(FILE_NAME, /read) ; Find the index of the sds to read using its name sds_index = HDF_SD_NAMETOINDEX(sd_id, SDS_NAME) ; Select it sds_id = HDF_SD_SELECT(sd_id, sds_index) ; Get data set information including dimension information HDF_SD_GetInfo, sds_id, name = SDS_NAME, natts = num_attributes, ndim = num_dims, dims = dim_sizes nrows = dim_sizes[1] ncols = dim_sizes[0] ; Define subset to read. start is [0,0]. start = INTARR(2) ; the start position of the data to be read start[0] = 0 start[1] = 0 edges = INTARR(2) ; the number of elements to read in each dimension edges[0] = dim_sizes[0] edges[1] = dim_sizes[1] ; Read the data : you can notice that here, it is not needed to allocate the data array yourself HDF_SD_GETDATA, sds_id, data, start = start, count = edges i=200 ; row index j=125 ; col index PRINT, FORMAT = '(I," ",$)', data[j,i] ; 65 PRINT, "" ; end access to SDS HDF_SD_ENDACCESS, sds_id ; close the hdf file HDF_SD_END, sd_id END
MATLAB
file_name = 'MYD04_L2.A2013060.1300.051.2013062021359.hdf'; sds_name = 'Optical_Depth_Land_And_Ocean' ; % Open a buffer for hdf file sd_id = hdfsd( 'start', file_name, 'rdonly') % Get the number of datasets and global attributes [num_datasets, num_global_attr, status] = hdfsd('fileinfo',sd_id) % Get the sds identifier from the dataset named sds_name sds_index = hdfsd('nametoindex', sd_id, sds_name) sds_id = hdfsd('select', sd_id, sds_index) % Get the name, number of dimensions, size of each dimension, data type and number of %attributes of a dataset [sds_name, sds_num_dim, sds_dim, sds_data_type, sds_num_attr] = hdfsd('getinfo', sds_id) % Read the dataset identified by sds_id sds_start_vector = zeros(1, sds_num_dim) %the position to begin reading sds_stride = [] %the interval between value to read sds_end_vector = sds_dim %the end of dimension to read [sds_data, status] = hdfsd('readdata', sds_id, sds_start, sds_stride, sds_edges) ; % Read the dataset named sds_name data = hdfread (file_name, sds_name) ; i = 200 ; %row index j = 125 ; %col index disp([data(i, j)]) ; % Or, disp([sds_data(j, i)]) ; % Close access to the dataset status = hdfsd('endaccess',sds_id); % Close access to the hdf file status = hdfsd('end',sd_id);
HDF5
Fortran
program sds_info use hdf5 implicit none ! Variables declaration CHARACTER*100 :: file_name CHARACTER*100 :: sds_name CHARACTER*100 :: gr_name CHARACTER*100 :: attr_name INTEGER(HID_T):: file_id, gr_id, dset_id, attr_id INTEGER :: status, error, storage, nlinks,max_corder, attr_num REAL, DIMENSION(10,10) :: dset_data, data_out INTEGER, DIMENSION(1:10) :: buf INTEGER(HSIZE_T), DIMENSION(2):: data_dims INTEGER(HSIZE_T), DIMENSION(1) ::dims ! Variables initalization file_name = "example.h5" sds_name = "/g1/g1.1/dset1.1.1" gr_name = "g3" attr_name = "attr1" ! Initialize the interface call h5open_f(status) ! Open an hdf5 file call h5fopen_f (file_name, H5F_ACC_RDWR_F, file_id, status) ! Get the number of global attributes call h5aget_num_attrs_f(file_id, attr_num, error) print *, "attr_num ",attr_num ! Open a group call h5gopen_f(file_id, gr_name, gr_id, status ) ! Get information about a group ! **storage: type of storage for links in the group: Compact storage, Indexed storage or Symbol tables ! **nlinks : number of links in the group ! **max_corder : current maximum creation order value for group call h5gget_info_f(gr_id, storage, nlinks,max_corder, error) print*,"storage, nlinks,max_corder", storage, nlinks,max_corder ! Open a dataset call h5dopen_f(file_id, sds_name, dset_id, error) ! Get the number of attributes call h5aget_num_attrs_f(dset_id, attr_num, error) print *, "attr_num ",attr_num ! Read the dataset call h5dread_f(dset_id, H5T_NATIVE_REAL, data_out, data_dims, error) print *, "data_out ",data_out(2,2) ! Open an attribute call h5aopen_f(file_id, attr_name, attr_id, error) ! Read an attribute call h5aread_f(attr_id, H5T_NATIVE_INTEGER, buf, dims, error) print *, "buf ", buf ! Terminate access to the group call h5gclose_f(gr_id, error) ! Terminate access to the dataset call h5dclose_f(dset_id, error) ! Terminate access to the file call h5fclose_f(file_id, error) ! Close FORTRAN interface. call h5close_f(status) end program sds_info
MATLAB
file_name = '/matlab/R2012a/toolbox/matlab/demos/example.h5'; sds_name = '/g4/lon' ; % Open hdf file in read-only mode sd_id = H5F.open(file_name); % Get file info by file id file_info = H5F.get_info(sd_id) % Get global info of the file info = h5info(file_name) % Display the structure of the file h5disp(file_name); % Open a group g_id = H5G.open(sd_id,'/g2'); % Get group info group_info = H5F.get_info(g_id) % Open sds by name sds_id = H5D.open(g_id,sds_name); % Get global info of a sds sds_info = h5info(file_name,'/g4/time') % Display the structure of a sds h5disp(file_name, sds_name); % Get attributes info of a sds attval = h5readatt(file_name, '/g4/lon','units') % Read all data of a sds data = H5D.read(sds_id) % Read the first 5-by-3 subset sds_data= h5read(file_name, '/g4/world', [1 1], [5 3]) ; disp(sds_data); %Close sds access H5D.close(sds_id); %Close group access H5G.close(g_id); % Close access to the hdf file H5F.close(sd_id);
R
#To install the package rhdf5 , you need a current version (>2.15.0) of R #After installing R you can run the following commands from the R command shell to install the bioconductor package rhdf5. #source("http://bioconductor.org/biocLite.R") #biocLite("rhdf5") library(rhdf5) filename = "/home/Projets/R/example.h5" # Read data of all datasets of a group h5read(filename,"g2") # Read a dataset h5read(filename,"g2/dset2.2") # Display the contents of main groups h5ls(filename, FALSE) # Open a hdf5 file fid <-H5Fopen(filename) # Get the number of global attributes H5Oget_num_attrs(fid) # Open the object "g2" gid <- H5Oopen(fid, "g2") # Get group info H5Gget_info(gid) #or H5Gget_info_by_name(fid, "g2") # Get the number of group attributes H5Oget_num_attrs(gid) # or H5Oget_num_attrs("g2") # Open a dataset did <- H5Dopen(fid, "g2/dset2.2") # Get dataspace dataspace <- H5Dget_space(did) dataspace # Close dataspace access H5Sclose(dataspace) #Close dataset access H5Dclose(did) # Close group access H5Oclose(gid) # Close hdf5 file access H5Fclose(fid)