The following describes the data description language for HDF5 file. The description is in Backus-Naur Form. A quick explanation of the some symbols used.
::= defined as
<tname> a token with the name tname
<a> | <b> one of <a> or <b>
<a>opt zero or one occurrence of <a>
<a>* zero or more occurrence of <a>
<a>+ one or more occurrence of <a>
TBD To Be Decided
<file> ::= hdf5 "<file_name>" { <file_boot_block>opt <root_group> }
<file_name> ::= <identifier>
<file_boot_block> ::= boot_block { <boot_block_content> }
<boot_block_content> ::= TBD
<root_group> ::= group "/" { <nlink>opt <group_attribute>* <group_member>* }
// nlink must appear before attributes and members.
<nlink>opt ::= { nlink <no_of_hardlinks> }
<no_of_hardlinks> ::= an integer larger than 1
<group_attribute> ::= <attribute>
<attribute> ::= attribute "attr_name" { <datatype>
<dataspace>
<data>opt }
// <datatype>, <dataspace> and <data> can be in any order
// as long as <data> comes the last.
<attr_name> ::= <identifier>
<datatype> ::= datatype { "<datatype_name>" } | // shared data type
datatype { <scalar_datatype> } |
datatype { <dt_definition>+ }
<datatype_name> ::= <identifier>
<scalar_datatype> ::= H5T_NATIVE_CHAR | H5T_NATIVE_INT | ... // data types supported by HDF5
<dt_definition> ::= <scalar_datatype> <field_name> ;
<field_name> ::= <identifier>
<dataspace> ::= dataspace { "<dataspace_name>" } |
dataspace { array <current_dims> <max_dims> } |
dataspace { other <ds_definition>+ }
<dataspace_name> ::= <identifier>
<current_dims> ::= a list of integers represented as (i1, i2, ... ) where ik is an integer, k = 1,2,...
<max_dims> ::= (i1, i2, ... ) where ik is an integer or H5S_UNLIMITED
<ds_definition> ::= TBD
<data> ::= data {TBD}
<group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | <softlink>
<named_datatype> ::= datatype "<datatype_name>" { <dt_definition>+ }
<named_dataspace> ::= dataspace "<dataspace_name>" { array <current_dims> <max_dims> } |
dataspace "<dataspace_name>" { other <ds_definition>+ }
<group> ::= group "<group_name>" { <nlink>opt <group_attribute>* <group_member>* }
<group_name> ::= <identifier>
<dataset> ::= dataset "<dataset_name>" { <nlink>opt
<datatype>
<dataspace>
<storagelayout>opt
<compression>opt
<dataset_attribute>*
<data>opt }
// Any order is ok as long as <data> and <dataset_attribute> are
// after <datatype> and <dataspace>.
// nlink must appear before others.
<dataset_name> ::= <identifier>
<storagelayout> :: = storagelayout <contiguous_layout> |
storagelayout <chunked_layout> |
storagelayout <compact_layout> |
storagelayout <external_layout>
<contiguous_layout> ::= {contiguous} // default
<chunked_layout> ::= {chunked <dims> }
<dims> ::= (i1, i2, ... ), ik is an integer, k = 1,2,...
<compact_layout> ::= TBD
<external_layout> ::= {external <external_file>+ }
<external_file> ::= (<file_name> <offset> <size>)
<offset> ::= an integer
<size> ::= an integer
<compression> :: = compression { TBD }
<dataset_attribute> ::= <attribute>
<softlink> ::= softlink "<softlink_name>" { linktarget "<target>" }
<softlink_name> ::= <identifier>
<target> ::= <identifier>
<identifier> ::= string
// character "/" should be used with care.
// <dataset_name> cannot contain "/", for example.
Consider an HDF5 file, example.h5, with the following structure:
/
/ | \
group1 group3 dataset3
/ \ |
group2 dataset2 softlink
/
dataset1
hdf5 "example" {
group "/" {
datatype "shared_datatype" { // named data type
H5T_NATIVE_CHAR c[20];
H5T_NATIVE_INT i;
}
attribute "group_attribute" { //attribute
datatype {H5T_NATIVE_CHAR}
dataspace {array (20) (20)}
data { "group attribute" }
}
group "group1" {
group "group2" {
dataset "dataset1" {
dataspace {array (4) (4)}
datatype {"shared_datatype"}
attribute "dataset1_attr" {
datatype {H5T_NATIVE_CHAR}
dataspace {array (10) (10)}
}
}
}
dataset "dataset2" {
datatype {H5T_NATIVE_INT}
dataspace {array (2,5) (H5S_UNLIMITED, 5)}
data {0,0,0,0,0,0,0,0,0,0}
}
}
group "group3" {
softlink "slink" {
linktarget "/sometarget"
}
}
dataset "dataset3" {
datatype {H5T_NATIVE_FLOAT}
dataspace {array (100,100) (100,100)}
}
}
}