[From: http://www.bioxml.org/Projects/game/game.dtd.html]
<!ELEMENT game ANY>
<!-- ************************************************************* -->
<!-- GAME Genome Annotation Markup Elements -->
<!-- Document Type Definition DTD - June. 5,1999 - Version 1.0 -->
<!--
Suzanna E. Lewis
-->
<!--
Erwin Frise
-->
<!-- University of California Berkeley -->
<!-- $Id: game.dtd.html,v 1.1 2000/03/07 19:54:51 bradmars Exp $ -->
<!-- Annotations are a summarization of all the collected features
discerned
and described on related sequences of genomic DNA, transcripts, mRNAs
(and
cDNAs which are treated as their logical equivalent), and proteins.
Each of
these molecules has regions along their linear length described by
annotators as 'features'. The features themselves are a combined summary
of
both computational and genetic analysis of that DNA, RNA, or AA sequence.
Computational analyses are not considered 'features' and are treated
as
primary data, as are any experimental analyses carried out at the bench.
In
other words, analytical results may be used to identify features, but
are
not considered features on their own in this context. Thus, each molecule
is
described both in terms of primary analytical results and in terms
of expert
defined features that are supported by the preceding results. The
combination of all these associated feature descriptions on the related
molecules (from genomic to protein) constitute a statement that is
called an
annotation.
-->
<!-- '' == ONE, NO MORE NO LESS -->
<!-- '?' == ZERO OR ONE. -->
<!-- '*' == ZERO OR MORE. -->
<!-- '+' == ONE OR MORE. -->
<!-- General purpose entities and elements that are used in
mulitiple elements -->
<!ENTITY % integer "NMTOKEN">
<!ELEMENT type (#PCDATA)>
<!ELEMENT value (#PCDATA)>
<!ELEMENT name (#PCDATA)>
<!ELEMENT synonym (#PCDATA)>
<!ELEMENT term (#PCDATA)>
<!ELEMENT program (#PCDATA)>
<!ELEMENT version (#PCDATA)>
<!ELEMENT score (#PCDATA)>
<!-- ISO date format -->
<!ELEMENT creation_date (#PCDATA)>
<!ELEMENT date (#PCDATA)>
<!-- for comments and other free text -->
<!ELEMENT description (#PCDATA)>
<!-- DNA, RNA, AA -->
<!ELEMENT residues (#PCDATA)>
<!ELEMENT alignment (#PCDATA)>
<!ELEMENT parameter (type, value)>
<!ELEMENT output (type, value)>
<!ELEMENT parent (type, value)>
<!-- these must be integers -->
<!ELEMENT offset (#PCDATA)>
<!ELEMENT length (#PCDATA)>
<!ELEMENT from (#PCDATA)>
<!ELEMENT to (#PCDATA)>
<!ELEMENT start (#PCDATA)>
<!ELEMENT end (#PCDATA)>
<!-- things to describe where the sequence came from -->
<!ELEMENT species (#PCDATA)>
<!ELEMENT tissue (#PCDATA)>
<!ELEMENT stage (#PCDATA)>
<!ELEMENT project (#PCDATA)>
<!-- The entity 'site_operator' is specific to fuzzy site, start,
and end elements -->
<!ENTITY % site_operator " site_operator (less_than | greater_than)">
<!ELEMENT fuzzy_start (span)>
<!ATTLIST
start
%site_operator; #IMPLIED
>
<!ELEMENT fuzzy_end (span)>
<!ATTLIST
end
%site_operator; #IMPLIED
>
<!ELEMENT fuzzy_span (fuzzy_start, fuzzy_end)>
<!-- either_dir attribute is because some features do not have
an orientation associated with them, but apply
equally well
to either strand -->
<!-- The between attribute is used to indicate a position between
2
bases (or more generally between 2 sites?).
It is important to note that this flag is
preferred to a
length of zero. The problem with a 0 length
span is that it
is not possible to tell if one means before
or after the
current base -->
<!ELEMENT span (start, end)>
<!ATTLIST
span
between (TRUE) #IMPLIED
either_dir (TRUE) #IMPLIED
>
<!-- SOME EXAMPLES
I've taken the locations from the descriptions in the
GB Feature Table
Definition. For descriptions of what the feature
spans mean, see:
http://www.ncbi.nlm.nih.gov/collab/FT/components.html#location_descriptors
(David Emmert, Harvard)
- Location: 467
<span>
<offset>466</offset>
<length>1</length>
</span>
- Location: 340..565
<span>
<offset>329</offset>
<length>225</length>
</span>
- Location: <345..500
<fuzzy_span>
<fuzzy_start
site_operator="less_than">
<span>
<offset>344</offset>
<length>1</length>
</span>
</fuzzy_start>
<fuzzy_end>
<span>
<offset>499</offset>
<length>1</length>
</span>
</fuzzy_end>
</fuzzy_span>
- Location: (102.110)
<span>
<offset>101</offset>
<length>8</length>
</span>
- Location: (23.45)..600
<fuzzy_span>
<fuzzy_start">
<span>
<offset>22</offset>
<length>22</length>
</span>
</fuzzy_start>
<fuzzy_end>
<span>
<offset>599</offset>
<length>1</length>
</span>
</fuzzy_end>
</fuzzy_span>
- Location: (122.133)..(204.221)
<fuzzy_span>
<fuzzy_start">
<span>
<offset>121</offset>
<length>11</length>
</span>
</fuzzy_start>
<fuzzy_end>
<span>
<offset>203</offset>
<length>17</length>
</span>
</fuzzy_end>
</fuzzy_span>
- Location: 123^124
<span between="TRUE">
<offset>122</offset>
<length>1</length>
</span>
-->
<!-- Annotation sub-elements. -->
<!-- NAME: the official (by someone's standard) symbol to use -->
<!-- DBXREF: The database cross-reference element refers to a
database where the annotation is generated and maintained. -->
<!-- GENE: Can't seem to avoid the nefarious gene concept. There
are
different relationship a gene can have to the annotation element.
One is a positive identification (or assignment) to a gene. The alternate
to
this is a list of known genes (from traditional genetic analysis) any
of
which are possible candidates for assigning to this annotation. Both
of
these assignment elements are naturally supposed to be within the same
species. Relationships to other genes (either within or in other species)
is
indicated by enclosing zero or more related gene elements. The specifics
of
the type of relationship is held within the sub-element. -->
<!-- DESCRIPTION: is a comment, a free text field for the curators
to jot down
any additional information. -->
<!-- FEATURE_SET: to make it possible to set this up in an analogous
manner
to computational_analysis and result_set -->
<!-- SEQ: what sequence this annotation applies to -->
<!-- Annotation attributes. -->
<!-- The id is a unique identifier for other elements to use
when referencing this annotation. -->
<!ELEMENT annotation (name?, dbxref?, gene*, aspect*, description?,
feature_set*, seq?)>
<!ATTLIST annotation
id ID
#REQUIRED
seq IDREF #IMPLIED
>
<!ELEMENT aspect (dbxref, (function | process | cellular_component))>
<!ELEMENT function (#PCDATA)>
<!ELEMENT process (#PCDATA)>
<!ELEMENT cellular_component (#PCDATA)>
<!-- Obviously there are other sorts of gene to gene relationships
and these
still need to be added -->
<!ENTITY % association "association (HOMOLOG|ORTHOLOG|PARALOG|IS|MAY_BE)">
<!ELEMENT gene (dbxref, name?, synonym?, species?, description?)>
<!ATTLIST gene
%association; #REQUIRED
annotation IDREF #IMPLIED
>
<!-- seq sub-elements. -->
<!-- The seq element represents the different DNA, RNA, and AA molecules.
-->
<!-- The database cross-reference refers to a sequence database like
genbank
or embl (only for genomic and cDNAs of course). -->
<!-- A single origin/source is requested to indicate the derivation
of the
primary sequence (this is basically clone information for genomic and
cDNA
data). -->
<!-- The residues are always optional for any of these. -->
<!-- seq element IDs are used to support derivation between seq
elements -->
<!-- Molecular element attributes. -->
<!-- Each has a unique identifier for other elements to use when
referencing
this sequence molecule. It may also act as a label in displays.
Because the DNA, RNA, or AA residue elements are optional a length
attribute
is required. The length provides the extent of the number line along
which
the features and analysis are positioned -->
<!-- These aspects are associated with an individual sequence
and not the annotation because a single annotation
may
describe the differnet gene products that
arise from
the same region of the genome -->
<!ENTITY % maturity "maturity (primary | processed | pro | pre-pro
| pre-pro-pro )">
<!ENTITY % transcript_function "transcript_function (mRNA | rRNA
| snoRNA | snRNA | tRNA | trans_spliced_leader)">
<!ENTITY % immigrant "immigrant (transposon | pseudogene | mobile_intron
| virus | plasmid)">
<!-- do we really want CDS? it seems redundant --
right-o its gone, also chucked cDNA -->
<!ENTITY % seq_type "type (AA | RNA | DNA)">
<!ELEMENT seq (name?, dbxref*, map_position*, source?, project?,
clone*, description?, residues?, parent*)>
<!ATTLIST seq
id
ID #REQUIRED
%seq_type;
#REQUIRED
produced_by IDREF
#IMPLIED
length %integer;
#IMPLIED
%maturity;
#IMPLIED
%transcript_function; #IMPLIED
%immigrant;
#IMPLIED
>
<!ELEMENT seq_relationship ((span | fuzzy_span), alignment?)>
<!ATTLIST seq_relationship
seq IDREF #IMPLIED
type (query | subject | peer | subseq) #IMPLIED
>
<!ENTITY % map_type "type (cytological | linear | ordering)">
<!ELEMENT map_position (map, span?)>
<!ATTLIST map_position
%map_type;
#REQUIRED
seq IDREF #IMPLIED
>
<!-- an example of a mapping
<map_position type=cytological>
<map>2R</map>
<span>
<start>35A</start>
<end>35B</end>
</span>
</map_position>
-->
<!-- ordering of the 'exons' is implied the ordering of the
features in this set -->
<!-- VERSION: as the annotation progresses versions are maintained
-->
<!-- AUTHOR: who/what created this annotation -->
<!-- DATE: date this annotation was first created -->
<!ELEMENT feature_set (name?, type?, seq_relationship*, author?,
creation_date?, version?, evidence*, parent*, description?, feature_span*,
seq?)>
<!ATTLIST feature_set
id
ID #REQUIRED
annotation IDREF
#IMPLIED
produces_seq IDREF
#IMPLIED
>
<!-- A 'feature' is defined by 3 things: a type,
an interval (start and end) to place it on
the molecule in question,
and the results that support this designation.
-->
<!ELEMENT feature_span (type?, seq_relationship*, evidence*, tag_residues?)>
<!ELEMENT computational_analysis (type?, database?, program, date?,
version?,
parameter*, result_set*)>
<!ATTLIST computational_analysis
seq IDREF
#IMPLIED
>
<!ELEMENT result_set (score?, seq_relationship*, dbxref?, output*, result_span*, parent*)>
<!ELEMENT result_span (score?, type?, seq_relationship+, output*)>
<!ATTLIST result_span
id
ID #IMPLIED
>
<!ELEMENT tag_residues (residues)>
<!ATTLIST tag_residues
offset %integer; #REQUIRED
>
<!ELEMENT evidence (dbxref?, description?)>
<!ATTLIST evidence
type CDATA #IMPLIED
result IDREF #IMPLIED
>
<!-- IMPORTANT
the element that the evidence result attribut
refers to may either
be a seq, a seq, a computational_analysis
or a result_span
depending upon whether or not the computed
results are actually
available within the xml document (a result_span).
If not directly
provide this provides a mechanism to indicate
how those results
can be regenerated. A seq element id means
that there is an alignment.
A computational_analysis indicates what program
to run and how.
A result_span element means that the program
has already been
run and the results are availabe within the
current dtd.
-->
<!ELEMENT experimental_analysis (experimental_conclusion+, citation,
description?)>
<!ATTLIST experimental_analysis
seq IDREF
#REQUIRED
>
<!ELEMENT experimental_conclusion (#PCDATA)>
<!ATTLIST experimental_conclusion
id ID
#IMPLIED
>
<!ELEMENT database (name, date?, version?)>
<!ELEMENT dbxref (xref_db, xref_db_id?)>
<!ELEMENT xref_db (#PCDATA)>
<!ELEMENT xref_db_id (#PCDATA)>
<!-- Everything below this point is very sketchy, so don't
jump to any conclusions from what follows
-->
<!-- Species is mandatory, it is the origin of the sequence -->
<!-- project is who generated this sequence (but not necessarily
the
associated features and analyses. -->
<!ELEMENT source (species?, tissue?, stage?)>
<!-- A database cross reference for the clone itself
Text content for say, ordering information? -->
<!ELEMENT clone (dbxref+, span?, description?)>
<!-- use the dublin core here?? -->
<!ENTITY % pub_type "type (Journal | Personal_communication | Proceedings
| Book)">
<!ELEMENT citation (dbxref?, title, journal?, date, author*, volume?,
pages?)>
<!ATTLIST citation
%pub_type;
#REQUIRED
>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (#PCDATA)>
<!ELEMENT volume (#PCDATA)>
<!ELEMENT pages (#PCDATA)>
<!ELEMENT journal (#PCDATA)>