FIGIS project DTD: Time Series Metadata module

<?xml version="1.0" encoding="UTF-8"?>
<!-- ============================================================== -->
<!-- Project: Time Series Universal Import Export -->
<!-- Component: Time Series Metadata module-->
<!-- Original author: Yves Jaques -->
<!-- Organisation: FAO of the UN, FIDI, FIGIS project -->
<!-- Date: 07/09/2001 -->
<!-- Modifications: -->
<!-- Date			Author					Note -->
<!-- =======		============		===================================== -->
<!-- dd/mm/yy	who?					why? -->
<!--                                                           -->
<!-- dd/10/2001   Yury Shatz                  ver. 2-->
<!-- dd/01/2002   Yves Jaques                  ver. 2.5, updates after various inputs from Taconet and meetings.-->
<!--============================================================== -->
<!--This DTD contains the structures that hold the descriptions of the structures that hold the actual data-->
<!-- ============================================================== -->
<!--FigisTimeSeriesTagLib entity calls a group of common elements shared by the various DTDs. -->
<!ENTITY % FigisTimeSeriesTagLib PUBLIC
	"Figis Time Series Import Export Tag Library 1.0"
	"TSTagLib1.0.dtd">
<!ENTITY % TagLibInclude 'INCLUDE'>
%FigisTimeSeriesTagLib;
<!-- ============================================================== -->
<!--TS_METADATA is the root container for time series dataset metadata.
It describes structure of a dataset, its keys and time series. 
 -->
<!ELEMENT TS_METADATA (NAME_LIST, KEY_LIST, UNIT_LIST?, SYMBOL_LIST?, TIME_AXIS, VARIABLE_LIST?)>
<!ATTLIST TS_METADATA
	FAMILY CDATA #IMPLIED
	SHORT_NAME CDATA #IMPLIED
	ACTIVE_DATASET (yes | no) "yes"
	%REM; 
>
<!--NAME_LIST contains the NAMEs for the dataset. -->
<!--KEY_LIST contains all the table KEYS in the DATASET -->
<!--UNIT_LIST contains all the measrement UNITs for the DATASET. -->
<!--SYMBOL_LIST contains all the SYMBOLs (flags that qualify data) for the DATASET. -->
<!-- TIME_AXIS describes time axis of the dataset -->
<!--VARIABLE_LIST contains the VARIABLES (aka series, parameters) for the DATASET. -->
<!--FAMILY is the name of the group (if any) to which the DATASET is associated. -->
<!--SHORT_NAME is an abbreviated convenience name for the DATASET. -->
<!--ACTIVE_DATASET indicates whether or not the dataset is active or inactive (discontinued.) -->
<!-- Example: too large to show physical example. A theoretical example would be  a set of names for the dataset, followed by a list of keys each containing countries or species together with their types and unit values for the data. -->
<!-- ==============================================================-->
<!--KEY_LIST contains all the table KEY(s) for a DATASET. It has a numeric attribute NUM_KEYS that stores the total number of keys found in the dataset.  -->
<!ELEMENT KEY_LIST (KEY)+>
<!ATTLIST KEY_LIST
	NUM_KEYS CDATA #REQUIRED
	%REM; 
>
<!--KEY is the root container for metadata about a dataset key. -->
<!-- NUM_KEYS contains the number of keys found in the KEY_LIST container.
Example: -->
<!-- A group of Keys. -->
<!-- ==============================================================-->
<!--KEY describes a dataset key. 
It does not contain actual key values, but just contains metadata.
-->
<!ELEMENT KEY (NAME_LIST?, OBJ_TYPE_REF+, REL_REF*, FORMAT_REF_LIST?, VALUE_FILTER?)>
<!ATTLIST KEY
	%REM; 
>
<!--NAME_LIST contains the NAME(s)  (if any) for the key. -->
<!--OBJ_TYPE_REF references the object classes that are found within the key. -->
<!--REL_REF lists the relationship(s) (if any) to other keys. -->
<!--optional FORMAT_REF_LIST lists the output format(s) for the key. -->
<!--optional VALUE_FILTER contains a list of values allowed for the key. These are
not values actually encountered but definition of all allowed values.
Import procedure may use this to filter the incoming data.
-->
<!-- Example: <KEY>
			<NAME_LIST>
				<NAME LANG="en">Country</NAME>
				<NAME LANG="fr">Pays</NAME>
			</NAME_LIST>

			<OBJ_TYPE_REF>
				<ID>Country</ID>
				<FOREIGN_ID ID_SCHEME="Fishstat">32</FOREIGN_ID>
				<FOREIGN_ID ID_SCHEME="Figis">13001</FOREIGN_ID>
			</OBJ_TYPE_REF>

			<REL_REF>
				<ID>CountryOnContinent</ID>
			</REL_REF>
		</KEY>
		-->
<!-- ==============================================================-->
<!--VALUE_FILTER - list of values allowed for the key -->
<!ELEMENT VALUE_FILTER (CUST_GRP_REF | RELATION_FILTER | OBJ_REF)+>
<!ATTLIST VALUE_FILTER
	%REM; 
>
<!-- Allowed values are defined in one of three ways: -->
<!--CUST_GRP_REF: references custom groups. Keys in these 
groups are allowed -->
<!--RELATION_FILTER contains references to related objects. Any objects 
related to those listed (e.g. their children) are allowed-->
<!--OBJ_REF: as references to individual objects  -->
<!-- Example: -->
<!-- List of allowed values for species, including a custom group, 
two ISSCAAP groups and two individual species -->
<!--
<VALUE_FILTER>
	<CUST_GRP_REF><ID>MarineFishes</ID></CUST_GRP_REF>
	<RELATION_FILTER>
		<OBJ_TYPE_REF><ID>Species</ID></OBJ_TYPE_REF>
		<REL_REF><ID>SpeciesInIsscaapGroup</ID></REL_REF>
		<RELATED_OBJ_LIST>
			<OBJ_REF><ID>IsscaapGroup11</ID></OBJ_REF>
			<OBJ_REF><ID>IsscaapGroup12</ID></OBJ_REF>
		</RELATED_OBJ_LIST>
	</RELATION_FILTER>
	<OBJ_REF><ID>SpeciesABC</ID></OBJ_REF>
	<OBJ_REF><FOREIGN_ID ID_SCHEME="Figis">2124</FOREIGN_ID></OBJ_REF>
</VALUE_FILTER>
-->
<!-- ==============================================================-->
<!--CUSTGRP_REF contains the NAME and ID(s) of a custom group in the dataset. -->
<!ELEMENT CUST_GRP_REF (%REF;)+>
<!ATTLIST CUST_GRP_REF
	%REM; 
>
<!-- Example: -->
<!-- see above -->
<!-- ==============================================================-->
<!--RELATION_FILTER defines a list of objects through their related objects
(usually groups)-->
<!ELEMENT RELATION_FILTER (OBJ_TYPE_REF, REL_REF, RELATED_OBJ_LIST)>
<!ATTLIST RELATION_FILTER
	%REM; 
>
<!-- It consists of 
- OBJ_TYPE_REF: object type we are selecting;
- REL_REF: relation we are using for selection;
- RELATED_OBJECT_LIST: related objects.
-->
<!-- Example: -->
<!-- see above -->
<!-- ==============================================================-->
<!--RELATION_OBJ_LIST is a list of references to objects
(usually groups)-->
<!ELEMENT RELATED_OBJ_LIST (OBJ_REF+)>
<!ATTLIST RELATED_OBJ_LIST
	%REM; 
>
<!-- Example: see above in example for VALUE_FILTER -->
<!-- ==============================================================-->
<!--FORMAT_REF_LIST contains a set of FORMAT_REFs that hold the ID's for the format strings for a key. -->
<!ELEMENT FORMAT_REF_LIST (FORMAT_REF)+>
<!ATTLIST FORMAT_REF_LIST
	%REM; 
>
<!-- FORMAT_REF hold the ID(s) for a format string for output display. -->
<!-- Example: -->
<!-- none -->
<!-- ==============================================================-->
<!--FORMAT_REF contains a formatting reference for a KEY (including its NAME and any IDs) for a format that is defined elsewhere. -->
<!ELEMENT FORMAT_REF (%REF;)+>
<!ATTLIST FORMAT_REF
	%REM; 
>
<!-- Example: -->
<!-- none -->
<!-- ==============================================================-->
<!--UNIT_LIST contains a list of UNITs for a DATASET or a VARIABLE. -->
<!ELEMENT UNIT_LIST (UNIT)+>
<!ATTLIST UNIT_LIST
	%REM; 
>
<!--UNIT contains the ID(s) and names for a unit of measurement. -->
<!-- Example: 
	<UNIT_LIST>
		<UNIT>
			<ID>MT</ID>
			<NAME_LIST>
				<NAME LANG="en">Metric ton</NAME>
			</NAME_LIST>
		</UNIT>
		<UNIT>
			<ID>$1000</ID>
			<NAME_LIST>
				<NAME LANG="en">Thousand US dollars</NAME>
			</NAME_LIST>
		</UNIT>
	</UNIT_LIST>
-->
<!-- ==============================================================-->
<!--UNIT contains an internal ID, zero or more FOREIGN_IDs and a NAME_LIST containing one or more unit NAMEs. -->
<!ELEMENT UNIT (ID, FOREIGN_ID*, NAME_LIST?)>
<!ATTLIST UNIT
	ABBREV CDATA #IMPLIED
	DEFAULT (yes | no) "no"
	%REM; 
>
<!--ID contains an ID value for a measurement unit. -->
<!--FOREIGN_ID contains an ID value for a measurement unit external to the IMP/EXP program. -->
<!--NAME_LIST contains one or more unit NAMEs. -->
<!-- Example: 
			<UNIT>
			<ID>MT</ID>
			<NAME_LIST>
				<NAME LANG="en">Metric ton</NAME>
			</NAME_LIST>
		</UNIT>
	-->
<!-- ==============================================================-->
<!--SYMBOL_LIST contains a set of SYMBOLs for a DATASET or a VARIABLE. These symbols are flags that qualify the data. -->
<!ELEMENT SYMBOL_LIST (SYMBOL)+>
<!ATTLIST SYMBOL_LIST
	%REM; 
>
<!--SYMBOL (data qualifying flag) contains a NAME_LIST of symbol names for a DATASET or a VARIABLE. -->
<!-- Example: 
	<SYMBOL_LIST>
		<SYMBOL CHAR=".">
			<NAME_LIST>
				<NAME LANG="en">Unknown</NAME>
			</NAME_LIST>
		</SYMBOL>
		<SYMBOL CHAR="0">
			<NAME_LIST>
				<NAME LANG="en">Negligible</NAME>
			</NAME_LIST>
		</SYMBOL>
		<SYMBOL CHAR="-">
			<NAME_LIST>
				<NAME LANG="en">Zero</NAME>
			</NAME_LIST>
		</SYMBOL>
		<SYMBOL CHAR="F">
			<NAME_LIST>
				<NAME LANG="en">FAO Estimate</NAME>
			</NAME_LIST>
		</SYMBOL>
		<SYMBOL CHAR="R">
			<NAME_LIST>
				<NAME LANG="en">Repetition</NAME>
			</NAME_LIST>
		</SYMBOL>
	</SYMBOL_LIST>
-->
<!-- ==============================================================-->
<!--SYMBOL (a flag that qualifies data) contains a NAME_LIST of symbol names for a DATASET or a VARIABLE. -->
<!ELEMENT SYMBOL (NAME_LIST)>
<!ATTLIST SYMBOL
	CHAR CDATA #REQUIRED
	DEFAULT (yes | no) "no"
	ZERO (yes | no | both) "no"
	%REM; 
>
<!--NAME_LIST contains the NAME(s) for the SYMBOL. -->
<!--CHAR holds the actual string value of the SYMBOL (usually 1 character).-->
<!--DEFAULT indicates whether or not it is the default flag for the value.-->
<!--ZERO indicates whether this symbol is used with zeroes, with non-zeroes
or both -->
<!-- Example: 
		<SYMBOL CHAR="F">
			<NAME_LIST>
				<NAME LANG="en">FAO Estimate</NAME>
			</NAME_LIST>
		</SYMBOL>
		-->
<!-- ==============================================================-->
<!--
TIME_AXIS describes dataset's time axis. 
Time axis can be regular or irregular. Regular axis is a list of time periods 
at regular intervals, e.g. years, quarters or months. It can be described using attributes only - you do not have to list the intervals. 
Irregular axis is a list of time periods not necessarily at regular intervals.
Such intervals must be explicitly listed.
-->
<!ELEMENT TIME_AXIS ((REGULAR, TIME_PERIOD_LIST?) | TIME_PERIOD_LIST)>
<!ATTLIST TIME_AXIS
	TIME_UNIT (year | quarter | month | other) "year"
	COLUMNS_PER_TIME_UNIT CDATA "1"
	START_DATE CDATA #REQUIRED
	NUM_COLUMNS CDATA #REQUIRED
	%REM; 
>
<!-- Regular time axis is identified by empty element REGULAR. If it is present,
TIME_PERIOD_LIST is optional, otherwise it is mandatory.

A regular TIME_AXIS has exactly COLUMNS_PER_TIME_UNIT columns for each
TIME_UNIT starting at START_DATE, for total of NUM_COLUMNS. -->

<!-- TIME_PERIOD_LIST holds the TIME_PERIODs for irregular axes.
It may be used for regular axes as well, if you need to change
names or IDs for time periods
-->
<!--TIME_UNIT and COLUMNS_PER_TIME_UNIT together define dataset resolution.
For example, 
TIME_UNIT="year" COLUMNS_PER_TIME_UNIT="1" is an annual dataset.
TIME_UNIT="year" COLUMNS_PER_TIME_UNIT="5" is a dataset with 5 columns per year,
e.g. 4 quarters and 'unknown quarter'. Etc.

Each TIME_PERIOD has a sequence number (1...), an ID and a screen name (see TIME_PERIOD_LIST definition). 
These ID, name or sequence number can be used to refer to the columns from TS_DATA (although it may be more convenient to use column numbers instead)

If TIME_PERIOD_LIST is missing for a regular axis, these IDs and names are generated automatically. 
Format is 
year[.time_unit][.column_within_unit]

year is a 4-digit year number
time_unit is a number of TIME_UNIT within the year (e.g. 1..4 for quarters etc)
column_within_unit is a number of column within time unit.

So, if we define our dataset as 
TIME_UNIT="year" COLUMNS_PER_TIME_UNIT="1" START_DATE="1990",
columns are 1990,1991,1992...
For
TIME_UNIT="year" COLUMNS_PER_TIME_UNIT="5" START_DATE="1990",
they are 1990.1, 1990.2,...1990.5, 1991.1 etc.
and for TIME_UNIT="month" COLUMNS_PER_TIME_UNIT="2" START_DATE="1995.1" 
(twice a month data)
they are 1995.1.1, 1995.1.2,1995.2.1, 1995.2.2...

User can ovveride column IDs and/or names for regular time axis, and MUST do so
for irregular-->
<!-- ==============================================================-->
<!-- REGULAR, Empty 'marker' element -->
<!ELEMENT REGULAR EMPTY>
<!ATTLIST REGULAR
	%REM; 
>
<!-- Its presence within TIME_AXIS indicates that it is a regular time axis.-->
<!-- ==============================================================-->
<!--TIME_PERIOD_LIST contains a set of TIME_PERIODs (slices of time) for which a VARIABLE contains data. -->
<!ELEMENT TIME_PERIOD_LIST (TIME_PERIOD)+>
<!ATTLIST TIME_PERIOD_LIST
	%REM; 
>
<!-- ==============================================================-->
<!--TIME_PERIOD contains a numeric attribute VALUE containing the name or number of a time period (slice) for which a VARIABLE contains data. -->
<!ELEMENT TIME_PERIOD (NAME_LIST?)>
<!ATTLIST TIME_PERIOD
	SEQ_NUMBER CDATA #IMPLIED
	ID CDATA #IMPLIED
	NUMERIC_VALUE CDATA #IMPLIED
	%REM; 
>
<!--SEQ_NUMBER is sequence number of TIME_PERIOD (starting from 1)
if omitted its position within TIME_PERIOD_LIST is implied
ID is unique identifier. TS_DATA may refer to ID or to sequence number.
If omitted, it is automatically assigned.
NAME_LIST are screen names in various languages. If omitted, same as ID
NUMERIC_VALUE is a number (not necessarily integer!) 
showing place on X axis. This may be needed for highly irregular datasets
(e.g. to display graph). If omitted it is
automatically generated from START_DATE and SEQ_NUMBER (or ignored) -->
<!-- Example: 
1) annual dataset from 1970 to 1999 (Capture)
<TIME_AXIS START_DATE="1970" NUM_COLUMNS="30">
  	<REGULAR/>
</TIME_AXIS>

2) quarterly dataset 	(with fake 5th quarter) from 1950 to 2000 (Tuna Atlas)
<TIME_AXIS START_DATE="1950" NUM_COLUMNS="255" COLUMNS_PER_TIME_UNIT="5">
  	<REGULAR/>
</TIME_AXIS>

3) quarterly dataset 	(with normal 4 quarters) from 1990 to 2000 
<TIME_AXIS START_DATE="1990" NUM_COLUMNS="44" TIME_UNIT="quarter">
  	<REGULAR/>
</TIME_AXIS>

4) Irregular dataset: Fleets from 1970..1995 with some years missing:
<TIME_AXIS START_DATE="1970" NUM_COLUMNS="21" TIME_UNIT="year">
	<TIME_PERIOD_LIST>
		<TIME_PERIOD ID="1970"/>
		<TIME_PERIOD ID="1975"/>
		<TIME_PERIOD ID="1977"/>
		<TIME_PERIOD ID="1978"/>
		<TIME_PERIOD ID="1979"/>
		<TIME_PERIOD ID="1980"/>
		<TIME_PERIOD ID="1981"/>
		<TIME_PERIOD ID="1982"/>
		<TIME_PERIOD ID="1983"/>
		<TIME_PERIOD ID="1984"/>
		etc
	</TIME_PERIOD_LIST>
</TIME_AXIS>
	
-->
<!-- ==============================================================-->
<!--VARIABLE_LIST contains one or more VARIABLES found in a DATASET. Attribute NUM_VARIABLES holds the number of instances of VARIABLE found in the VARIABLE_LIST. -->
<!ELEMENT VARIABLE_LIST (VARIABLE+)>
<!ATTLIST VARIABLE_LIST
	NUM_VARIABLES CDATA #REQUIRED
	%REM; 
>
<!--TIME_AXIS holds all the temporal data for the VARIABLE(s). -->
<!--VARIABLE contains timeseries metadata-->
<!--NUM_VARIABLES holds the number of instances of VARIABLE contained in the VARIABLE_LIST. -->
<!-- Example: -->
<!-- none -->
<!-- ==============================================================-->
<!--VARIABLE is a single variable within a dataset. For example, 
"Quantity", "Value", "Tonnage" etc. 
Synonyms for VARIABLE could be 'parameter', 'dimension', or even 'data concept'. 
In addition to identifying information,
(ID, NAME_LIST, ABBREV), it may override dataset 'global' metadata
such as measurement UNITs, SYMBOLs and even TIME_AXIS
if any of those differs between variables.
 -->
<!ELEMENT VARIABLE (ID, NAME_LIST?, UNIT_LIST?, SYMBOL_LIST?, TIME_AXIS?)>
<!ATTLIST VARIABLE
	ABBREV CDATA #REQUIRED
	PRECISION (0 | 1 | 2 | 3 | 4 | 5) "0"
	MULTIPLIER CDATA "1"
	%REM; 
>
<!--ID is the VARIABLE id.-->
<!--NAME_LIST contains one or more variable NAMEs. -->
<!--UNIT_LIST contains a list of UNITs for the VARIABLE. -->
<!--SYMBOL_LIST contains a set of SYMBOLs for the VARIABLE. These symbols are flags that qualify the data. -->
<!--TIME_AXIS holds the TIME_PERIODs covered by a VARIABLE. -->
<!--ABBREV is the variable short name. -->
<!--PRECISION is '1' for highest, '5' for lowest and '0' for unspecified.-->
<!--MULTPLIER is the multiplier if any applied to the timeseries data.-->
<!-- Example: 
<VARIABLE ABBREV="V">
	<ID>VALUE</ID>
	<NAME_LIST>
		<NAME xml:lang="en">Value</NAME>
		<NAME xml:lang="fr">Valeur</NAME>
	</NAME_LIST>
	<UNIT_LIST>
		<UNIT ABBREV="$1000">
			<ID>$1000</ID>
			<NAME_LIST>
				<NAME xml:lang="en">Thousand US Dollars</NAME>
			</NAME_LIST>
		</UNIT>
	</UNIT_LIST>
	symbol list and time axis are taken from TS_DATASET
</VARIABLE>			
-->
<!-- ==============================================================-->
<!--This is the end of the TimeSeriesMetadata DTD. -->
Prepared by Robin Cover for The XML Cover Pages archive.