The following describes the data description language for HDF5 file. The description is in Backus-Naur Form. A quick explanation of the some symbols used.
::= defined as
<tname> a token with the name tname
<a> | <b> one of <a> or <b>
<a>opt zero or one occurrence of <a>
<a>* zero or more occurrence of <a>
<a>+ one or more occurrence of <a>
TBD To Be Decided
<file> ::= hdf5 "<file_name>" { <file_boot_block>opt <root_group> }
<file_name> ::= <identifier>
<file_boot_block> ::= boot_block { <boot_block_content> }
<boot_block_content> ::= TBD
<root_group> ::= group "/" { <group_attribute>* <group_member>* }
// Attributes and members can be in any order.
<group_attribute> ::= <attribute>
<attribute> ::= attribute "attr_name" { <datatype>
<dataspace>
<data>opt }
// <datatype>, <dataspace> and <data> can be in any order
// as long as <data> comes the last.
<attr_name> ::= <identifier>
<datatype> ::= datatype "<datatype_name>" ; | // shared data type
datatype { <scalar_datatype> } ; |
datatype { <dt_definition>+ } ;
<datatype_name> ::= <identifier>
<scalar_datatype> ::= int8 | uint8 | ... // data types supported by HDF5
<dt_definition> ::= "<datatype_name>" <field_name> ; | <scalar_datatype> <field_name> ;
<field_name> ::= one or more field names separated by ","
<dataspace> ::= dataspace "<dataspace_name>" ; |
dataspace {array <current_dims> <max_dims>} ; |
dataspace {other <ds_definition>+ } ;
<dataspace_name> ::= <identifier>
<current_dims> ::= a list of integers represented of the form ( , , .. , )
<max_dims> ::= a list of integers or H5S_UNLIMITED represented of the form ( , , .. , )
<ds_definition> ::= TBD
<data> ::= data {TBD} ;
<group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | <link>
<named_datatype> ::= datatype "<datatype_name>" { <scalar_datatype> } |
datatype "<datatype_name>" { <dt_definition>+ }
<named_dataspace> ::= dataspace "<dataspace_name>" {array <current_dims> <max_dims> } |
dataspace "<dataspace_name>" { other <ds_definition>+ }
<group> ::= group "<group_name>" { <group_attribute>* <group_member>* }
// Attributes and members can be in any order.
<group_name> ::= <identifier>
<dataset> ::= dataset "<dataset_name>" { <datatype>
<dataspace>
<storagelayout>opt
<compression>opt
<dataset_attribute>*
<data>opt }
// Any order is ok as long as <data> and <dataset_attribute> are
// after <datatype> and <dataspace>.
<dataset_name> ::= <identifier>
<storagelayout> :: = storagelayout <contiguous_layout> ; |
storagelayout <chunked_layout> ; |
storagelayout <compact_layout> ; |
storagelayout <external_layout> ;
<contiguous_layout> ::= {contiguous} // default
<chunked_layout> ::= {chunked <dims> }
<dims> ::= a list of integers represented of the form ( , , .. , )
<compact_layout> ::= TBD // not implemented yet
<external_layout> ::= {external <external_file>+ }
<external_file> ::= (<file_name> <offset> <size>)
<offset> ::= an integer
<size> ::= an integer
<compression> :: = compression { TBD } ; //algorithm name and properties?
<dataset_attribute> ::= <attribute> ;
<link> ::= <hardlink> | <softlink>
<hardlink> ::= hardlink "<hardlink_name>" { linktarget "<target>"; }
<hardlink_name> ::= <identifier>
<target> ::= <identifier>
<softlink> ::= softlink "<softlink_name>" { linktarget "<target>"; }
<softlink_name> ::= <identifier>
<identifier> ::= string
// character "/" should be used with care.
// <dataset_name> cannot contain "/", for example.
Consider an HDF5 file, example.h5, with the following structure:
/
/ | \
group1 group3 dataset3
/ \ | ^
group2 dataset2 hardlink --|
/ \ ^
dataset1 softlink |
|------|
hdf5 "example" {
group "/" {
datatype "shared_datatype1" { // named data type
char name[20], address[40];
int32 id;
}
datatype "shared_datatype2" {
"shared_datatype1" rec[100];
float32 total;
}
dataspace "shared_dataspace1" { array (1000) (1000)} // named data space
attribute "group_attribute1" { //group attribute
datatype {int32};
dataspace "shared_dataspace1";
}
attribute "group_attribute2" {
datatype {int32};
dataspace {array (3,5) (3,5)} ;
data {(0,0,0,0,0) (1,1,1,1,1) (2,2,2,2,2)};
}
group "group1" {
attribute "group1_attribute" {
datatype {char};
dataspace {array (100) (200)};
data {("attribute_of_group_1")};
}
group "group2" {
datatype "shared_datatype3" {
int8 z;
int32 x;
}
dataset "dataset1" {
dataspace "shared_dataspace1";
datatype "shared_datatype3";
attribute "dataset1_attr" {
datatype {char};
dataspace {array (10) (10)};
data {("data1")};
};
}
softlink "sl" {
linktarget "/group1/dataset2";
}
}
dataset "dataset2" {
datatype {int8};
dataspace {array (2,5) (unlimited, 5)};
data {(0,0,0,0,0) (0,0,0,0,0)};
}
}
group "group3" {
hardlink "hl" {
linktarget "/dataset3" ;
}
}
dataset "dataset3" {
datatype { float32 };
dataspace {array (1000,1000) (1000,1000) };
storagelayout {chunked (250,1000)};
}
} // root group
}