DDL for HDF5

DDL in BNF

The following describes the data description language for HDF5 file. The description is in Backus-Naur Form. A quick explanation of the some symbols used.

	::=               defined as
	<tname>           a token with the name tname
	<a> | <b>         one of <a> or <b>
	<a>opt             zero or one occurrence of <a>
	<a>*              zero or more occurrence of <a>
	<a>+              one or more occurrence of <a>
        TBD               To Be Decided


<file> ::= hdf5 "<file_name>" { <file_boot_block>opt <root_group> }

<file_name> ::= <identifier>

<file_boot_block> ::= boot_block { <boot_block_content> }

<boot_block_content> ::= TBD

<root_group> ::= group "/" { <group_attribute>* <group_member>* }  
// Attributes and members can be in any order.

<group_attribute> ::= <attribute> 

<attribute> ::= attribute "attr_name" { <datatype>    
                                        <dataspace>   
                                        <data>opt  } 
// <datatype>, <dataspace> and <data> can be in any order 
// as long as <data> comes the last.

<attr_name> ::=  <identifier>

<datatype> ::= datatype "<datatype_name>" ; |          // shared data type
               datatype { <scalar_datatype> } ; |         
               datatype { <dt_definition>+ } ;       

<datatype_name> ::= <identifier>

<scalar_datatype> ::=  int8 | uint8 | ...     // data types supported by HDF5

<dt_definition> ::= "<datatype_name>" <field_name> ; | <scalar_datatype> <field_name> ;

<field_name> ::= one or more field names separated by ","

<dataspace> ::= dataspace "<dataspace_name>" ; |
                dataspace {array <current_dims> <max_dims>} ; | 
                dataspace {other <ds_definition>+ } ;         

<dataspace_name> ::= <identifier>

<current_dims> ::= a list of integers represented of the form ( , , .. , )

<max_dims> ::=  a list of integers or H5S_UNLIMITED represented of the form ( , , .. , )

<ds_definition> ::= TBD

<data> ::= data {TBD} ;

<group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | <link>

<named_datatype> ::= datatype "<datatype_name>" { <scalar_datatype> } |
                     datatype "<datatype_name>" { <dt_definition>+ }

<named_dataspace> ::= dataspace "<dataspace_name>" {array <current_dims> <max_dims> } |
                     dataspace "<dataspace_name>" { other <ds_definition>+ }

<group> ::= group "<group_name>" { <group_attribute>* <group_member>* }  
// Attributes and members can be in any order.

<group_name> ::= <identifier>

<dataset> ::= dataset "<dataset_name>" { <datatype>  
                                         <dataspace> 
                                         <storagelayout>opt
                                         <compression>opt
                                         <dataset_attribute>*
                                         <data>opt  } 
// Any order is ok as long as <data> and <dataset_attribute> are 
// after <datatype> and <dataspace>.

<dataset_name> ::= <identifier>

<storagelayout> :: = storagelayout <contiguous_layout> ; |  
                     storagelayout <chunked_layout> ; | 
                     storagelayout <compact_layout> ; | 
                     storagelayout <external_layout> ;

<contiguous_layout> ::= {contiguous}    // default

<chunked_layout> ::=  {chunked <dims> }

<dims> ::=  a list of integers represented of the form ( , , .. , )

<compact_layout> ::= TBD            // not implemented yet

<external_layout> ::= {external <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= an integer

<size> ::= an integer

<compression> :: = compression { TBD } ; //algorithm name and properties?

<dataset_attribute> ::= <attribute> ;

<link> ::= <hardlink> | <softlink>

<hardlink> ::= hardlink "<hardlink_name>" { linktarget "<target>"; }

<hardlink_name> ::= <identifier>

<target> ::= <identifier>

<softlink> ::= softlink "<softlink_name>" { linktarget "<target>"; }

<softlink_name> ::= <identifier>

<identifier> ::= string   
// character "/" should be used with care. 
// <dataset_name> cannot contain "/", for example.

An example of HDF5 File in DDL


Consider an HDF5 file, example.h5, with the following structure:

                             /

                     /       |       \
                 group1     group3  dataset3
                 /   \       |          ^
           group2  dataset2  hardlink --|
             /  \        ^
      dataset1 softlink  |
                  |------|


hdf5 "example" { 

group "/" {

   datatype "shared_datatype1" {                    // named data type
            char name[20], address[40];
            int32 id; 
   }
 
   datatype "shared_datatype2" {
            "shared_datatype1" rec[100];
            float32 total;
   }

   dataspace "shared_dataspace1" { array (1000) (1000)}         // named data space



   attribute "group_attribute1" {                   //group attribute
             datatype {int32};
             dataspace "shared_dataspace1";
   } 

   attribute "group_attribute2" {
             datatype {int32};
             dataspace {array (3,5) (3,5)} ;
             data {(0,0,0,0,0) (1,1,1,1,1) (2,2,2,2,2)};
   } 


   group "group1" {

         attribute "group1_attribute" {
                   datatype {char};
                   dataspace {array (100) (200)};
                   data {("attribute_of_group_1")};
         }

         group "group2" {

               datatype "shared_datatype3" {
                         int8 z;
                         int32 x; 
               }

               dataset "dataset1" {
                        dataspace "shared_dataspace1";
                        datatype "shared_datatype3";
                        attribute "dataset1_attr" {
                                   datatype {char};
                                   dataspace {array (10) (10)};
                                   data {("data1")};
                        };
               }
 
 
               softlink "sl" {
                        linktarget "/group1/dataset2";
               } 
         }

         dataset "dataset2" {
               datatype {int8};
               dataspace {array (2,5) (unlimited, 5)};
               data {(0,0,0,0,0) (0,0,0,0,0)};
         }
   }
   
   group "group3" {
         hardlink "hl" {
                  linktarget "/dataset3" ;
         }
   }
      
   
   dataset "dataset3" {
           datatype { float32 };
           dataspace {array (1000,1000) (1000,1000) };
           storagelayout {chunked (250,1000)};
   }
 
} // root group

}