001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see http://hdfgroup.org/products/hdf-java/doc/Copyright.html.         *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.Vector;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 *
029 * @see hdf.object.ScalarDS
030 * @see hdf.object.CompoundDS
031 *
032 * @version 1.1 9/4/2007
033 * @author Peter X. Cao
034 */
035public abstract class Dataset extends HObject {
036    /**
037     *
038     */
039    private static final long serialVersionUID    = -3360885430038261178L;
040
041    private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
042
043    /**
044     * The memory buffer that holds the raw data of the dataset.
045     */
046    protected Object          data;
047
048    /**
049     * The number of dimensions of the dataset.
050     */
051    protected int             rank;
052
053    /**
054     * The current dimension sizes of the dataset
055     */
056    protected long[]          dims;
057
058    /**
059     * The max dimension sizes of the dataset
060     */
061    protected long[]          maxDims;
062
063    /**
064     * Array that contains the number of data points selected (for read/write)
065     * in each dimension.
066     * <p>
067     * The select size must be less than or equal to the current dimension size.
068     * A subset of a rectangle selection is defined by the starting position and
069     * selected sizes.
070     * <p>
071     * For example, a 4 X 5 dataset
072     *
073     * <pre>
074     *     0,  1,  2,  3,  4
075     *    10, 11, 12, 13, 14
076     *    20, 21, 22, 23, 24
077     *    30, 31, 32, 33, 34
078     * long[] dims = {4, 5};
079     * long[] startDims = {1, 2};
080     * long[] selectedDims = {3, 3};
081     * then the following subset is selected by the startDims and selectedDims above
082     *     12, 13, 14
083     *     22, 23, 24
084     * 32, 33, 34
085     * </pre>
086     */
087    protected long[]          selectedDims;
088
089    /**
090     * The starting position of each dimension of a selected subset. With both
091     * the starting position and selected sizes, the subset of a rectangle
092     * selection is fully defined.
093     */
094    protected long[]          startDims;
095
096    /**
097     * Array that contains the indices of the dimensions selected for display.
098     * <p>
099     * <B>selectedIndex[] is provided for two purpose:</B>
100     * <OL>
101     * <LI>
102     * selectedIndex[] is used to indicate the order of dimensions for display,
103     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
104     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
105     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
106     * dim[2] is selected as column index and dim[3] is selected as depth index.
107     * <LI>
108     * selectedIndex[] is also used to select dimensions for display for
109     * datasets with three or more dimensions. We assume that applications such
110     * as HDFView can only display data up to three dimensions (a 2D
111     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
112     * cut from). For dataset with more than three dimensions, we need
113     * selectedIndex[] to store which three dimensions are chosen for display.
114     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
115     * then dim[1] is selected as row index, dim[2] is selected as column index
116     * and dim[3] is selected as depth index. dim[0] is not selected. Its
117     * location is fixed at 0 by default.
118     * </OL>
119     */
120    protected final int[]     selectedIndex;
121
122    /**
123     * The number of elements to move from the start location in each dimension.
124     * For example, if selectedStride[0] = 2, every other data point is selected
125     * along dim[0].
126     */
127    protected long[]          selectedStride;
128
129    /**
130     * The array of dimension sizes for a chunk.
131     */
132    protected long[]          chunkSize;
133
134    /** The compression information. */
135    protected String          compression;
136    public final static String          compression_gzip_txt = "GZIP: level = ";
137
138    /** The filters information. */
139    protected String          filters;
140
141    /** The storage information. */
142    protected String          storage;
143
144    /** The datatype object of the dataset. */
145    protected Datatype        datatype;
146
147    /**
148     * Array of strings that represent the dimension names. It is null if
149     * dimension names do not exist.
150     */
151    protected String[]        dimNames;
152
153    /** Flag to indicate if the byte[] array is converted to strings */
154    protected boolean         convertByteToString = true;
155
156    /** Flag to indicate if data values are loaded into memory. */
157    protected boolean         isDataLoaded        = false;
158
159    /** The number of data points in the memory buffer. */
160    protected long            nPoints             = 1;
161
162    /**
163     * The data buffer that contains the raw data directly reading from file
164     * (before any data conversion).
165     */
166    protected Object          originalBuf         = null;
167
168    /**
169     * The array that holds the converted data of unsigned C-type integers.
170     * <p>
171     * For example, Suppose that the original data is an array of unsigned
172     * 16-bit short integers. Since Java does not support unsigned integer, the
173     * data is converted to an array of 32-bit singed integer. In that case, the
174     * converted buffer is the array of 32-bit singed integer.
175     */
176    protected Object          convertedBuf        = null;
177
178    /**
179     * Flag to indicate if the enum data is converted to strings.
180     */
181    protected boolean         enumConverted       = false;
182
183    /**
184     * Constructs a Dataset object with a given file, name and path.
185     *
186     * @param theFile
187     *            the file that contains the dataset.
188     * @param name
189     *            the name of the Dataset, e.g. "dset1".
190     * @param path
191     *            the full group path of this Dataset, e.g. "/arrays/".
192     */
193    public Dataset(FileFormat theFile, String name, String path) {
194        this(theFile, name, path, null);
195    }
196
197    /**
198     * @deprecated Not for public use in the future. <br>
199     *             Using {@link #Dataset(FileFormat, String, String)}
200     *
201     * @param theFile
202     *            the file that contains the dataset.
203     * @param name
204     *            the name of the Dataset, e.g. "dset1".
205     * @param path
206     *            the full group path of this Dataset, e.g. "/arrays/".
207     * @param oid
208     *            the oid of this Dataset.
209     */
210    @Deprecated
211    public Dataset(FileFormat theFile, String name, String path, long[] oid) {
212        super(theFile, name, path, oid);
213
214        rank = 0;
215        data = null;
216        dims = null;
217        maxDims = null;
218        selectedDims = null;
219        startDims = null;
220        selectedStride = null;
221        chunkSize = null;
222        compression = "NONE";
223        filters = "NONE";
224        storage = "NONE";
225        dimNames = null;
226
227        selectedIndex = new int[3];
228        selectedIndex[0] = 0;
229        selectedIndex[1] = 1;
230        selectedIndex[2] = 2;
231    }
232
233    /**
234     * Clears memory held by the dataset, such as data buffer.
235     */
236    public void clear() {
237        if (data != null) {
238            if (data instanceof Vector) {
239                ((Vector) data).setSize(0);
240            }
241            data = null;
242            originalBuf = null;
243            convertedBuf = null;
244        }
245        isDataLoaded = false;
246    }
247
248    /**
249     * Retrieves datatype and dataspace information from file and sets the
250     * dataset in memory.
251     * <p>
252     * The init() is designed to support lazy operation in dataset object. When
253     * a data object is retrieved from file, the datatype, dataspace and raw
254     * data are not loaded into memory. When it is asked to read the raw data
255     * from file, init() is first called to get the datatype and dataspace
256     * information, then load the raw data from file.
257     * <p>
258     * init() is also used to reset selection of a dataset (start, stride and
259     * count) to the default, which is the entire dataset for 1D or 2D datasets.
260     * In the following example, init() at step 1) retrieve datatype and
261     * dataspace information from file. getData() at step 3) read only one data
262     * point. init() at step 4) reset the selection to the whole dataset.
263     * getData() at step 4) reads the values of whole dataset into memory.
264     *
265     * <pre>
266     * dset = (Dataset) file.get(NAME_DATASET);
267     *
268     * // 1) get datatype and dataspace information from file
269     * dset.init();
270     * rank = dset.getRank(); // rank = 2, a 2D dataset
271     * count = dset.getSelectedDims();
272     * start = dset.getStartDims();
273     * dims = dset.getDims();
274     *
275     * // 2) select only one data point
276     * for (int i = 0; i &lt; rank; i++) {
277     *     start[0] = 0;
278     *     count[i] = 1;
279     * }
280     *
281     * // 3) read one data point
282     * data = dset.getData();
283     *
284     * // 4) reset to select the whole dataset
285     * dset.init();
286     *
287     * // 5) clean the memory data buffer
288     * dset.clearData();
289     *
290     * // 6) Read the whole dataset
291     * data = dset.getData();
292     * </pre>
293     */
294    public abstract void init();
295
296    /**
297     * Returns the rank (number of dimensions) of the dataset.
298     *
299     * @return the number of dimensions of the dataset.
300     */
301    public final int getRank() {
302        if (rank < 0) init();
303
304        return rank;
305    }
306
307    /**
308     * Returns the array that contains the dimension sizes of the dataset.
309     *
310     * @return the dimension sizes of the dataset.
311     */
312    public final long[] getDims() {
313        if (rank < 0) init();
314
315        return dims;
316    }
317
318    /**
319     * Returns the array that contains the max dimension sizes of the dataset.
320     *
321     * @return the max dimension sizes of the dataset.
322     */
323    public final long[] getMaxDims() {
324        if (rank < 0) init();
325
326        if (maxDims == null) return dims;
327
328        return maxDims;
329    }
330
331    /**
332     * Returns the dimension sizes of the selected subset.
333     * <p>
334     * The SelectedDims is the number of data points of the selected subset.
335     * Applications can use this array to change the size of selected subset.
336     *
337     * The select size must be less than or equal to the current dimension size.
338     * Combined with the starting position, selected sizes and stride, the
339     * subset of a rectangle selection is fully defined.
340     * <p>
341     * For example, a 4 X 5 dataset
342     *
343     * <pre>
344     *     0,  1,  2,  3,  4
345     *    10, 11, 12, 13, 14
346     *    20, 21, 22, 23, 24
347     *    30, 31, 32, 33, 34
348     * long[] dims = {4, 5};
349     * long[] startDims = {1, 2};
350     * long[] selectedDims = {3, 3};
351     * long[] selectedStride = {1, 1};
352     * then the following subset is selected by the startDims and selectedDims
353     *     12, 13, 14
354     *     22, 23, 24
355     *     32, 33, 34
356     * </pre>
357     *
358     * @return the dimension sizes of the selected subset.
359     */
360    public final long[] getSelectedDims() {
361        if (rank < 0) init();
362
363        return selectedDims;
364    }
365
366    /**
367     * Returns the starting position of a selected subset.
368     * <p>
369     * Applications can use this array to change the starting position of a
370     * selection. Combined with the selected dimensions, selected sizes and
371     * stride, the subset of a rectangle selection is fully defined.
372     * <p>
373     * For example, a 4 X 5 dataset
374     *
375     * <pre>
376     *     0,  1,  2,  3,  4
377     *    10, 11, 12, 13, 14
378     *    20, 21, 22, 23, 24
379     *    30, 31, 32, 33, 34
380     * long[] dims = {4, 5};
381     * long[] startDims = {1, 2};
382     * long[] selectedDims = {3, 3};
383     * long[] selectedStride = {1, 1};
384     * then the following subset is selected by the startDims and selectedDims
385     *     12, 13, 14
386     *     22, 23, 24
387     *     32, 33, 34
388     * </pre>
389     *
390     * @return the starting position of a selected subset.
391     */
392    public final long[] getStartDims() {
393        if (rank < 0) init();
394
395        return startDims;
396    }
397
398    /**
399     * Returns the selectedStride of the selected dataset.
400     * <p>
401     * Applications can use this array to change how many elements to move in
402     * each dimension.
403     *
404     * Combined with the starting position and selected sizes, the subset of a
405     * rectangle selection is defined.
406     * <p>
407     * For example, a 4 X 5 dataset
408     *
409     * <pre>
410     *     0,  1,  2,  3,  4
411     *    10, 11, 12, 13, 14
412     *    20, 21, 22, 23, 24
413     *    30, 31, 32, 33, 34
414     * long[] dims = {4, 5};
415     * long[] startDims = {0, 0};
416     * long[] selectedDims = {2, 2};
417     * long[] selectedStride = {2, 3};
418     * then the following subset is selected by the startDims and selectedDims
419     *     0,   3
420     *     20, 23
421     * </pre>
422     *
423     * @return the selectedStride of the selected dataset.
424     */
425    public final long[] getStride() {
426        if (rank < 0) init();
427
428        if (rank <= 0) {
429            return null;
430        }
431
432        if (selectedStride == null) {
433            selectedStride = new long[rank];
434            for (int i = 0; i < rank; i++) {
435                selectedStride[i] = 1;
436            }
437        }
438
439        return selectedStride;
440    }
441
442    /**
443     * Sets the flag that indicates if a byte array is converted to a string
444     * array.
445     * <p>
446     * In a string dataset, the raw data from file is stored in a byte array. By
447     * default, this byte array is converted to an array of strings. For a large
448     * dataset (e.g. more than one million strings), the converson takes a long
449     * time and requires a lot of memory space to store the strings. At some
450     * applications, such a conversion can be delayed. For example, A GUI
451     * application may convert only part of the strings that are visible to the
452     * users, not the entire data array.
453     * <p>
454     * setConvertByteToString(boolean b) allows users to set the flag so that
455     * applications can choose to perform the byte-to-string conversion or not.
456     * If the flag is set to false, the getData() returns a array of byte
457     * instead of an array of strings.
458     *
459     * @param b
460     *            convert bytes to strings if b is true; otherwise, if false, do
461     *            not convert bytes to strings.
462     */
463    public final void setConvertByteToString(boolean b) {
464        convertByteToString = b;
465    }
466
467    /**
468     * Returns the flag that indicates if a byte array is converted to a string
469     * array..
470     *
471     * @return true if byte array is converted to string; otherwise, returns
472     *         false if there is no conversion.
473     */
474    public final boolean getConvertByteToString() {
475        return convertByteToString;
476    }
477
478    /**
479     * Reads the data from file.
480     * <p>
481     * read() reads the data from file to a memory buffer and returns the memory
482     * buffer. The dataset object does not hold the memory buffer. To store the
483     * memory buffer in the dataset object, one must call getData().
484     * <p>
485     * By default, the whole dataset is read into memory. Users can also select
486     * subset to read. Subsetting is done in an implicit way.
487     * <p>
488     * <b>How to Select a Subset</b>
489     * <p>
490     * A selection is specified by three arrays: start, stride and count.
491     * <ol>
492     * <li>start: offset of a selection
493     * <li>stride: determining how many elements to move in each dimension
494     * <li>count: number of elements to select in each dimension
495     * </ol>
496     * getStartDims(), getStartDims() and getSelectedDims() returns the start,
497     * stride and count arrays respectively. Applications can make a selection
498     * by changing the values of the arrays.
499     * <p>
500     * The following example shows how to make a subset. In the example, the
501     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
502     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
503     * We want to select every other data point in dims[1] and dims[2]
504     *
505     * <pre>
506     * int rank = dataset.getRank(); // number of dimension of the dataset
507     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
508     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset
509     * long[] start = dataset.getStartDims(); // the off set of the selection
510     * long[] stride = dataset.getStride(); // the stride of the dataset
511     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for
512     *                                                   // display
513     *
514     * // select dim1 and dim2 as 2D data for display,and slice through dim0
515     * selectedIndex[0] = 1;
516     * selectedIndex[1] = 2;
517     * selectedIndex[1] = 0;
518     *
519     * // reset the selection arrays
520     * for (int i = 0; i &lt; rank; i++) {
521     *     start[i] = 0;
522     *     selected[i] = 1;
523     *     stride[i] = 1;
524     * }
525     *
526     * // set stride to 2 on dim1 and dim2 so that every other data points are
527     * // selected.
528     * stride[1] = 2;
529     * stride[2] = 2;
530     *
531     * // set the selection size of dim1 and dim2
532     * selected[1] = dims[1] / stride[1];
533     * selected[2] = dims[1] / stride[2];
534     *
535     * // when dataset.getData() is called, the selection above will be used since
536     * // the dimension arrays are passed by reference. Changes of these arrays
537     * // outside the dataset object directly change the values of these array
538     * // in the dataset object.
539     * </pre>
540     * <p>
541     * For ScalarDS, the memory data buffer is an one-dimensional array of byte,
542     * short, int, float, double or String type based on the datatype of the
543     * dataset.
544     * <p>
545     * For CompoundDS, the memory data object is an java.util.List object. Each
546     * element of the list is a data array that corresponds to a compound field.
547     * <p>
548     * For example, if compound dataset "comp" has the following nested
549     * structure, and member datatypes
550     *
551     * <pre>
552     * comp --&gt; m01 (int)
553     * comp --&gt; m02 (float)
554     * comp --&gt; nest1 --&gt; m11 (char)
555     * comp --&gt; nest1 --&gt; m12 (String)
556     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
557     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
558     * </pre>
559     *
560     * getData() returns a list of six arrays: {int[], float[], char[],
561     * String[], long[] and double[]}.
562     *
563     * @return the data read from file.
564     *
565     * @see #getData()
566     *
567     * @throws Exception if object can not be read
568     * @throws OutOfMemoryError if memory is exhausted
569     */
570    public abstract Object read() throws Exception, OutOfMemoryError;
571
572    /**
573     * Reads the raw data of the dataset from file to a byte array.
574     * <p>
575     * readBytes() reads raw data to an array of bytes instead of array of its
576     * datatype. For example, for an one-dimension 32-bit integer dataset of
577     * size 5, the readBytes() returns of a byte array of size 20 instead of an
578     * int array of 5.
579     * <p>
580     * readBytes() can be used to copy data from one dataset to another
581     * efficiently because the raw data is not converted to its native type, it
582     * saves memory space and CPU time.
583     *
584     * @return the byte array of the raw data.
585     *
586     * @throws Exception if data can not be read
587     */
588    public abstract byte[] readBytes() throws Exception;
589
590    /**
591     * Writes a memory buffer to the dataset in file.
592     *
593     * @param buf
594     *            the data to write
595     *
596     * @throws Exception if data can not be written
597     */
598    public abstract void write(Object buf) throws Exception;
599
600    /**
601     * Writes the memory buffer of this dataset to file.
602     *
603     * @throws Exception if buffer can not be written
604     */
605    public final void write() throws Exception {
606        if (data != null) {
607            write(data);
608        }
609    }
610
611    /**
612     * Creates a new dataset and writes the data buffer to the new dataset.
613     * <p>
614     * This function allows applications to create a new dataset for a given
615     * data buffer. For example, users can select a specific interesting part
616     * from a large image and create a new image with the selection.
617     * <p>
618     * The new dataset retains the datatype and dataset creation properties of
619     * this dataset.
620     *
621     * @param pgroup
622     *            the group which the dataset is copied to.
623     * @param name
624     *            the name of the new dataset.
625     * @param dims
626     *            the dimension sizes of the the new dataset.
627     * @param data
628     *            the data values of the subset to be copied.
629     *
630     * @return the new dataset.
631     *
632     * @throws Exception if dataset can not be copied
633     */
634    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
635
636    /**
637     * Returns the datatype object of the dataset.
638     *
639     * @return the datatype object of the dataset.
640     */
641    public abstract Datatype getDatatype();
642
643    /**
644     * Returns the data buffer of the dataset in memory.
645     * <p>
646     * If data is already loaded into memory, returns the data; otherwise, calls
647     * read() to read data from file into a memory buffer and returns the memory
648     * buffer.
649     * <p>
650     * By default, the whole dataset is read into memory. Users can also select
651     * subset to read. Subsetting is done in an implicit way.
652     * <p>
653     * <b>How to Select a Subset</b>
654     * <p>
655     * A selection is specified by three arrays: start, stride and count.
656     * <ol>
657     * <li>start: offset of a selection
658     * <li>stride: determining how many elements to move in each dimension
659     * <li>count: number of elements to select in each dimension
660     * </ol>
661     * getStartDims(), getStartDims() and getSelectedDims() returns the start,
662     * stride and count arrays respectively. Applications can make a selection
663     * by changing the values of the arrays.
664     * <p>
665     * The following example shows how to make a subset. In the example, the
666     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
667     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
668     * We want to select every other data point in dims[1] and dims[2]
669     *
670     * <pre>
671     * int rank = dataset.getRank(); // number of dimension of the dataset
672     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
673     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
674     * long[] start = dataset.getStartDims(); // the off set of the selection
675     * long[] stride = dataset.getStride(); // the stride of the dataset
676     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for
677     *                                                   // display
678     *
679     * // select dim1 and dim2 as 2D data for display,and slice through dim0
680     * selectedIndex[0] = 1;
681     * selectedIndex[1] = 2;
682     * selectedIndex[1] = 0;
683     *
684     * // reset the selection arrays
685     * for (int i = 0; i &lt; rank; i++) {
686     *     start[i] = 0;
687     *     selected[i] = 1;
688     *     stride[i] = 1;
689     * }
690     *
691     * // set stride to 2 on dim1 and dim2 so that every other data points are
692     * // selected.
693     * stride[1] = 2;
694     * stride[2] = 2;
695     *
696     * // set the selection size of dim1 and dim2
697     * selected[1] = dims[1] / stride[1];
698     * selected[2] = dims[1] / stride[2];
699     *
700     * // when dataset.getData() is called, the slection above will be used since
701     * // the dimension arrays are passed by reference. Changes of these arrays
702     * // outside the dataset object directly change the values of these array
703     * // in the dataset object.
704     * </pre>
705     * <p>
706     * For ScalarDS, the memory data buffer is an one-dimensional array of byte,
707     * short, int, float, double or String type based on the datatype of the
708     * dataset.
709     * <p>
710     * For CompoundDS, the memory data object is an java.util.List object. Each
711     * element of the list is a data array that corresponds to a compound field.
712     * <p>
713     * For example, if compound dataset "comp" has the following nested
714     * structure, and memeber datatypes
715     *
716     * <pre>
717     * comp --&gt; m01 (int)
718     * comp --&gt; m02 (float)
719     * comp --&gt; nest1 --&gt; m11 (char)
720     * comp --&gt; nest1 --&gt; m12 (String)
721     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
722     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
723     * </pre>
724     *
725     * getData() returns a list of six arrays: {int[], float[], char[],
726     * String[], long[] and double[]}.
727     *
728     * @return the memory buffer of the dataset.
729     *
730     * @throws Exception if object can not be read
731     * @throws OutOfMemoryError if memory is exhausted
732     */
733    public final Object getData() throws Exception, OutOfMemoryError {
734        if (!isDataLoaded) {
735            log.trace("getData: read");
736            data = read(); // load the data;
737            originalBuf = data;
738            isDataLoaded = true;
739            nPoints = 1;
740            log.trace("getData: selectedDims length={}",selectedDims.length);
741            for (int j = 0; j < selectedDims.length; j++) {
742                nPoints *= selectedDims[j];
743            }
744            log.trace("getData: read {}", nPoints);
745        }
746
747        return data;
748    }
749
750    /**
751     * @deprecated Not for public use in the future.
752     *             <p>
753     *             setData() is not safe to use because it changes memory buffer
754     *             of the dataset object. Dataset operation such as write/read
755     *             will fail if the buffer type or size is changed.
756     *
757     * @param d  the object data
758     */
759    @Deprecated
760    public final void setData(Object d) {
761        data = d;
762    }
763
764    /**
765     * Clears the data buffer in memory and to force the next read() to load
766     * data from file.
767     * <p>
768     * The function read() loads data from file into memory only if the data is
769     * not read. If data is already in memory, read() just returns the memory
770     * buffer. Sometimes we want to force read() to re-read data from file. For
771     * example, when the selection is changed, we need to re-read the data.
772     *
773     * clearData() clears the current memory buffer and force the read() to load
774     * the data from file.
775     *
776     * @see #getData()
777     * @see #read()
778     */
779    public void clearData() {
780        isDataLoaded = false;
781    }
782
783    /**
784     * Returns the dimension size of the vertical axis.
785     *
786     * <p>
787     * This function is used by GUI applications such as HDFView. GUI
788     * applications display a dataset in a 2D table or 2D image. The display
789     * order is specified by the index array of selectedIndex as follow:
790     * <dl>
791     * <dt>selectedIndex[0] -- height</dt>
792     * <dd>The vertical axis</dd>
793     * <dt>selectedIndex[1] -- width</dt>
794     * <dd>The horizontal axis</dd>
795     * <dt>selectedIndex[2] -- depth</dt>
796     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
797     * </dl>
798     * Applications can use getSelectedIndex() to access and change the display
799     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
800     * following code will set the height=200 and width=50.
801     *
802     * <pre>
803     * int[] selectedIndex = dataset.getSelectedIndex();
804     * selectedIndex[0] = 0;
805     * selectedIndex[1] = 1;
806     * </pre>
807     *
808     * @see #getSelectedIndex()
809     * @see #getWidth()
810     *
811     * @return the size of dimension of the vertical axis.
812     */
813    public final int getHeight() {
814        if (rank < 0) init();
815
816        if ((selectedDims == null) || (selectedIndex == null)) {
817            return 0;
818        }
819
820        return (int) selectedDims[selectedIndex[0]];
821    }
822
823    /**
824     * Returns the size of dimension of the horizontal axis.
825     *
826     * <p>
827     * This function is used by GUI applications such as HDFView. GUI
828     * applications display dataset a 2D Table or 2D Image. The display order is
829     * specified by the index array of selectedIndex as follow:
830     * <dl>
831     * <dt>selectedIndex[0] -- height</dt>
832     * <dd>The vertical axis</dd>
833     * <dt>selectedIndex[1] -- width</dt>
834     * <dd>The horizontal axis</dd>
835     * <dt>selectedIndex[2] -- depth</dt>
836     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
837     * </dl>
838     * Applications can use getSelectedIndex() to access and change the display
839     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
840     * following code will set the height=200 and width=100.
841     *
842     * <pre>
843     * int[] selectedIndex = dataset.getSelectedIndex();
844     * selectedIndex[0] = 0;
845     * selectedIndex[1] = 1;
846     * </pre>
847     *
848     * @see #getSelectedIndex()
849     * @see #getHeight()
850     *
851     * @return the size of dimension of the horizontal axis.
852     */
853    public final int getWidth() {
854        if (rank < 0) init();
855
856        if ((selectedDims == null) || (selectedIndex == null)) {
857            return 0;
858        }
859
860        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
861            return 1;
862        }
863
864        return (int) selectedDims[selectedIndex[1]];
865    }
866
867    /**
868     * Returns the indices of display order.
869     * <p>
870     *
871     * selectedIndex[] is provided for two purpose:
872     * <OL>
873     * <LI>
874     * selectedIndex[] is used to indicate the order of dimensions for display.
875     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
876     * selectedIndex[2] for the depth.
877     * <p>
878     * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3},
879     * then dim[1] is selected as row index, dim[2] is selected as column index
880     * and dim[3] is selected as depth index.
881     * <LI>
882     * selectedIndex[] is also used to select dimensions for display for
883     * datasets with three or more dimensions. We assume that applications such
884     * as HDFView can only display data values up to three dimension (2D
885     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
886     * is selected from). For dataset with more than three dimensions, we need
887     * selectedIndex[] to tell applications which three dimensions are chosen
888     * for display. <br>
889     * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3},
890     * then dim[1] is selected as row index, dim[2] is selected as column index
891     * and dim[3] is selected as depth index. dim[0] is not selected. Its
892     * location is fixed at 0 by default.
893     * </OL>
894     *
895     * @return the array of the indices of display order.
896     */
897    public final int[] getSelectedIndex() {
898        if (rank < 0) init();
899
900        return selectedIndex;
901    }
902
903    /**
904     * Returns the string representation of compression information.
905     * <p>
906     * For example,
907     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
908     *
909     * @return the string representation of compression information.
910     */
911    public final String getCompression() {
912        if (rank < 0) init();
913
914        return compression;
915    }
916
917    /**
918     * Returns the string representation of filter information.
919     *
920     * @return the string representation of filter information.
921     */
922    public final String getFilters() {
923        if (rank < 0) init();
924
925        return filters;
926    }
927
928    /**
929     * Returns the string representation of storage information.
930     *
931     * @return the string representation of storage information.
932     */
933    public final String getStorage() {
934        if (rank < 0) init();
935
936        return storage;
937    }
938
939    /**
940     * Returns the array that contains the dimension sizes of the chunk of the
941     * dataset. Returns null if the dataset is not chunked.
942     *
943     * @return the array of chunk sizes or returns null if the dataset is not
944     *         chunked.
945     */
946    public final long[] getChunkSize() {
947        if (rank < 0) init();
948
949        return chunkSize;
950    }
951
952    /**
953     * @deprecated Not for public use in the future. <br>
954     *             Using {@link #convertFromUnsignedC(Object, Object)}
955     *
956     * @param data_in  the object data
957     *
958     * @return the converted object
959     */
960    @Deprecated
961    public static Object convertFromUnsignedC(Object data_in) {
962        return Dataset.convertFromUnsignedC(data_in, null);
963    }
964
965    /**
966     * Converts one-dimension array of unsigned C-type integers to a new array
967     * of appropriate Java integer in memory.
968     * <p>
969     * Since Java does not support unsigned integer, values of unsigned C-type
970     * integers must be converted into its appropriate Java integer. Otherwise,
971     * the data value will not displayed correctly. For example, if an unsigned
972     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
973     * the correct value of 200.
974     * <p>
975     * Unsigned C integers are upgrade to Java integers according to the
976     * following table:
977     * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400>
978     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
979     * <TR>
980     * <TD><B>Unsigned C Integer</B></TD>
981     * <TD><B>JAVA Intege</B>r</TD>
982     * </TR>
983     * <TR>
984     * <TD>unsigned byte</TD>
985     * <TD>signed short</TD>
986     * </TR>
987     * <TR>
988     * <TD>unsigned short</TD>
989     * <TD>signed int</TD>
990     * </TR>
991     * <TR>
992     * <TD>unsigned int</TD>
993     * <TD>signed long</TD>
994     * </TR>
995     * <TR>
996     * <TD>unsigned long</TD>
997     * <TD>signed long</TD>
998     * </TR>
999     * </TABLE>
1000     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
1001     * Therefore, the values of unsigned 64-bit dataset may be wrong in Java
1002     * application</strong>.
1003     * <p>
1004     * If memory data of unsigned integers is converted by
1005     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1006     * the data back to unsigned C before data is written into file.
1007     *
1008     * @see #convertToUnsignedC(Object, Object)
1009     *
1010     * @param data_in
1011     *            the input 1D array of the unsigned C-type integers.
1012     * @param data_out
1013     *            the output converted (or upgraded) 1D array of Java integers.
1014     *
1015     * @return the upgraded 1D array of Java integers.
1016     */
1017    public static Object convertFromUnsignedC(Object data_in, Object data_out) {
1018        if (data_in == null) {
1019            return null;
1020        }
1021
1022        Class data_class = data_in.getClass();
1023        if (!data_class.isArray()) {
1024            return null;
1025        }
1026
1027        if (data_out != null) {
1028            Class data_class_out = data_out.getClass();
1029            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1030                data_out = null;
1031            }
1032        }
1033
1034        String cname = data_class.getName();
1035        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1036        int size = Array.getLength(data_in);
1037        log.trace("convertFromUnsignedC: cname={} dname={} size={}", cname, dname, size);
1038
1039        if (dname == 'B') {
1040            short[] sdata = null;
1041            if (data_out == null) {
1042                sdata = new short[size];
1043            }
1044            else {
1045                sdata = (short[]) data_out;
1046            }
1047
1048            byte[] bdata = (byte[]) data_in;
1049            for (int i = 0; i < size; i++) {
1050                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
1051            }
1052
1053            data_out = sdata;
1054        }
1055        else if (dname == 'S') {
1056            int[] idata = null;
1057            if (data_out == null) {
1058                idata = new int[size];
1059            }
1060            else {
1061                idata = (int[]) data_out;
1062            }
1063
1064            short[] sdata = (short[]) data_in;
1065            for (int i = 0; i < size; i++) {
1066                idata[i] = (sdata[i] + 65536) & 0xFFFF;
1067            }
1068
1069            data_out = idata;
1070        }
1071        else if (dname == 'I') {
1072            long[] ldata = null;
1073            if (data_out == null) {
1074                ldata = new long[size];
1075            }
1076            else {
1077                ldata = (long[]) data_out;
1078            }
1079
1080            int[] idata = (int[]) data_in;
1081            for (int i = 0; i < size; i++) {
1082                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
1083            }
1084
1085            data_out = ldata;
1086        }
1087        else {
1088            data_out = data_in;
1089            log.debug("convertFromUnsignedC: Java does not support unsigned long");
1090        }
1091
1092        return data_out;
1093    }
1094
1095    /**
1096     * @deprecated Not for public use in the future. <br>
1097     *             Using {@link #convertToUnsignedC(Object, Object)}
1098     *
1099     * @param data_in
1100     *            the input 1D array of the unsigned C-type integers.
1101     *
1102     * @return the upgraded 1D array of Java integers.
1103     */
1104    @Deprecated
1105    public static Object convertToUnsignedC(Object data_in) {
1106        return Dataset.convertToUnsignedC(data_in, null);
1107    }
1108
1109    /**
1110     * Converts the array of converted unsigned integer back to unsigned C-type
1111     * integer data in memory.
1112     * <p>
1113     * If memory data of unsigned integers is converted by
1114     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1115     * the data back to unsigned C before data is written into file.
1116     *
1117     * @see #convertFromUnsignedC(Object, Object)
1118     *
1119     * @param data_in
1120     *            the input array of the Java integer.
1121     * @param data_out
1122     *            the output array of the unsigned C-type integer.
1123     *
1124     * @return the converted data of unsigned C-type integer array.
1125     */
1126    public static Object convertToUnsignedC(Object data_in, Object data_out) {
1127        if (data_in == null) {
1128            return null;
1129        }
1130
1131        Class data_class = data_in.getClass();
1132        if (!data_class.isArray()) {
1133            return null;
1134        }
1135
1136        if (data_out != null) {
1137            Class data_class_out = data_out.getClass();
1138            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1139                data_out = null;
1140            }
1141        }
1142
1143        String cname = data_class.getName();
1144        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1145        int size = Array.getLength(data_in);
1146        log.trace("convertToUnsignedC: cname={} dname={} size={}", cname, dname, size);
1147
1148        if (dname == 'S') {
1149            byte[] bdata = null;
1150            if (data_out == null) {
1151                bdata = new byte[size];
1152            }
1153            else {
1154                bdata = (byte[]) data_out;
1155            }
1156            short[] sdata = (short[]) data_in;
1157            for (int i = 0; i < size; i++) {
1158                bdata[i] = (byte) sdata[i];
1159            }
1160            data_out = bdata;
1161        }
1162        else if (dname == 'I') {
1163            short[] sdata = null;
1164            if (data_out == null) {
1165                sdata = new short[size];
1166            }
1167            else {
1168                sdata = (short[]) data_out;
1169            }
1170            int[] idata = (int[]) data_in;
1171            for (int i = 0; i < size; i++) {
1172                sdata[i] = (short) idata[i];
1173            }
1174            data_out = sdata;
1175        }
1176        else if (dname == 'J') {
1177            int[] idata = null;
1178            if (data_out == null) {
1179                idata = new int[size];
1180            }
1181            else {
1182                idata = (int[]) data_out;
1183            }
1184            long[] ldata = (long[]) data_in;
1185            for (int i = 0; i < size; i++) {
1186                idata[i] = (int) ldata[i];
1187            }
1188            data_out = idata;
1189        }
1190        else {
1191            data_out = data_in;
1192            log.debug("convertToUnsignedC: Java does not support unsigned long");
1193        }
1194
1195        return data_out;
1196    }
1197
1198    /**
1199     * Converts an array of bytes into an array of Strings for a fixed string
1200     * dataset.
1201     * <p>
1202     * A C-string is an array of chars while an Java String is an object. When a
1203     * string dataset is read into Java application, the data is stored in an
1204     * array of Java bytes. byteToString() is used to convert the array of bytes
1205     * into array of Java strings so that applications can display and modify
1206     * the data content.
1207     * <p>
1208     * For example, the content of a two element C string dataset is {"ABC",
1209     * "abc"}. Java applications will read the data into an byte array of {65,
1210     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1211     * String of strs[0]="ABC", and strs[1]="abc".
1212     * <p>
1213     * If memory data of strings is converted to Java Strings, stringToByte()
1214     * must be called to convert the memory data back to byte array before data
1215     * is written to file.
1216     *
1217     * @see #stringToByte(String[], int)
1218     *
1219     * @param bytes
1220     *            the array of bytes to convert.
1221     * @param length
1222     *            the length of string.
1223     *
1224     * @return the array of Java String.
1225     */
1226    public static final String[] byteToString(byte[] bytes, int length) {
1227        if (bytes == null) {
1228            return null;
1229        }
1230
1231        int n = bytes.length / length;
1232        log.trace("byteToString: n={} from length of {}", n, length);
1233        // String bigstr = new String(bytes);
1234        String[] strArray = new String[n];
1235        String str = null;
1236        int idx = 0;
1237        for (int i = 0; i < n; i++) {
1238            str = new String(bytes, i * length, length);
1239            // bigstr.substring uses less memory space
1240            // NOTE: bigstr does not work on linux if bytes.length is very large
1241            // see bug 1091
1242            // offset = i*length;
1243            // str = bigstr.substring(offset, offset+length);
1244
1245            idx = str.indexOf('\0');
1246            if (idx > 0) {
1247                str = str.substring(0, idx);
1248            }
1249
1250            // trim only the end
1251            int end = str.length();
1252            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1253                end--;
1254
1255            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1256
1257            // trim both start and end
1258            // strArray[i] = str.trim();
1259        }
1260
1261        return strArray;
1262    }
1263
1264    /**
1265     * Converts a string array into an array of bytes for a fixed string
1266     * dataset.
1267     * <p>
1268     * If memory data of strings is converted to Java Strings, stringToByte()
1269     * must be called to convert the memory data back to byte array before data
1270     * is written to file.
1271     *
1272     * @see #byteToString(byte[] bytes, int length)
1273     *
1274     * @param strings
1275     *            the array of string.
1276     * @param length
1277     *            the length of string.
1278     *
1279     * @return the array of bytes.
1280     */
1281    public static final byte[] stringToByte(String[] strings, int length) {
1282        if (strings == null) {
1283            return null;
1284        }
1285
1286        int size = strings.length;
1287        byte[] bytes = new byte[size * length];
1288        log.trace("stringToByte: size={} length={}", size, length);
1289        StringBuffer strBuff = new StringBuffer(length);
1290        for (int i = 0; i < size; i++) {
1291            // initialize the string with spaces
1292            strBuff.replace(0, length, " ");
1293
1294            if (strings[i] != null) {
1295                if (strings[i].length() > length) {
1296                    strings[i] = strings[i].substring(0, length);
1297                }
1298                strBuff.replace(0, length, strings[i]);
1299            }
1300
1301            strBuff.setLength(length);
1302            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1303        }
1304
1305        return bytes;
1306    }
1307
1308    /**
1309     * Returns the array of strings that represent the dimension names. Returns
1310     * null if there is no dimension name.
1311     * <p>
1312     * Some datasets have pre-defined names for each dimension such as
1313     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1314     * names.
1315     *
1316     * @return the names of dimensions, or null if there is no dimension name.
1317     */
1318    public final String[] getDimNames() {
1319        if (rank < 0) init();
1320
1321        return dimNames;
1322    }
1323
1324    /**
1325     * Checks if a given datatype is a string. Sub-classes must replace this
1326     * default implementation.
1327     *
1328     * @param tid
1329     *            The data type identifier.
1330     *
1331     * @return true if the datatype is a string; otherwise returns false.
1332     */
1333    public boolean isString(int tid) {
1334        return false;
1335    }
1336
1337    /**
1338     * Returns the size in bytes of a given datatype. Sub-classes must replace
1339     * this default implementation.
1340     *
1341     * @param tid
1342     *            The data type identifier.
1343     *
1344     * @return The size of the datatype
1345     */
1346    public int getSize(int tid) {
1347        return -1;
1348    }
1349
1350    /**
1351     * Get flag that indicate if enum data is converted to strings.
1352     *
1353     * @return the enumConverted
1354     */
1355    public boolean isEnumConverted() {
1356        return enumConverted;
1357    }
1358
1359    /**
1360     * Set flag that indicate if enum data is converted to strings.
1361     *
1362     * @param b
1363     *            the enumConverted to set
1364     */
1365    public void setEnumConverted(boolean b) {
1366        if (enumConverted != b) {
1367            originalBuf = convertedBuf = null;
1368            this.clearData();
1369        }
1370
1371        enumConverted = b;
1372    }
1373
1374    /**
1375     * Get Class of the original data buffer if converted.
1376     *
1377     * @return the Class of originalBuf
1378     */
1379    public final Class getOriginalClass() {
1380        return originalBuf.getClass();
1381    }
1382}