Parallel Computing: Hyperslabs in HDF5

Well, I hope you guys where able to get HDF5 and OpenMPI running on your machines. As you will notice, the previous example code is not very useful because every process can read and write to any part of the file. This means that “you” as the programmer will have to manually control what is access by which process.

Have no fear. The folks at NCSA introduced the concept of a hyperslab.

Hyperslabs are portions of datasets. A hyperslab selection can be a logically contiguous collection of points in a dataspace, or it can be a regular pattern of points or blocks in a dataspace.

Here is the previous code converted into a hyperslab example.

#include <mpi.h>
#include <hdf5.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main (int argc, char *argv[])
{
        hid_t fd, plist_id, dspace, mspace, dset;
        int i, rank, size;

        /* create 2D hyperslab */
        hsize_t count[2];
        hsize_t offset[2];
        hsize_t dims[2] = {8, 5};

        int *data;

        /* initialize mpi */
        MPI_Init (&argc, &argv);
        MPI_Comm_size (MPI_COMM_WORLD, &size);
        MPI_Comm_rank (MPI_COMM_WORLD, &rank);          

        /* define size of hyperslab */
        count[0] = dims[0]/size;
        count[1] = dims[1];
        offset[0] = rank * count[0];
        offset[1] = 0;

        /* create sample data on in my memory space */
        data = (int *) malloc(sizeof(int)*count[0]*count[1]);
        for (i=0; i < count[0]*count[1]; i++)
                data[i] = rank + 10;

        /* create properties list to define parallel i/o */
        plist_id = H5Pcreate (H5P_FILE_ACCESS);
        H5Pset_fapl_mpio (plist_id, MPI_COMM_WORLD, MPI_INFO_NULL);

        /* create file */
        fd = H5Fcreate ("pdata.h5", H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
        if (fd < 0) {
                printf ("parallel %d: error on create.", rank);
                MPI_Finalize ();
                exit (1);
        }

        /* close properties list */
        H5Pclose (plist_id);

        /* create two dimensional space for the whole thing data space */
        dspace = H5Screate_simple (2, dims, NULL); 

        /* create data set */
        dset = H5Dcreate (fd, "Parallel", H5T_NATIVE_INT, dspace, H5P_DEFAULT);

        /* create two dimensional space in memory for just each processes share */
        mspace = H5Screate_simple(2, count, NULL);

        /* select hyperslab from dspace */
        H5Sselect_hyperslab(dspace, H5S_SELECT_SET, offset, NULL, count, NULL);

        /* set property list to write data collectively */
        /* change COLLECTIVE to INDEPENDENT for independent write */
        plist_id = H5Pcreate (H5P_DATASET_XFER);
        H5Pset_dxpl_mpio (plist_id, H5FD_MPIO_COLLECTIVE);

        /* write data to all */
        if (H5Dwrite(dset, H5T_NATIVE_INT, mspace, dspace, plist_id, data) < 0)
                perror ("parallel");

        /* free data */
        free (data);

        /* close properties list */
        H5Pclose (plist_id);

        /* close data set */
        H5Dclose (dset);

        /* close data space */
        H5Sclose (dspace);

        /* close file */
        H5Fclose (fd);

        /* close mpi */
        MPI_Finalize ();

        return 0;
} /* main */

You can use a Makefile similar to the one in the previous blog post. That should be a good start. Again, run this program with either 2, 4, 6, 8 processes. Happy coding!

Leave a Reply