{
    "$id": "https://linguistics.rub.de/~roussel/tabular-json/schema",
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "title": "Tabular JSON Document Schema, Version 1.2.0",
    "type": "object",
    "required": ["id", "metadata", "token"],
    "properties": {
        "id": {
            "type": "string",
            "description": "A unique identifier for the document."
        },
        "metadata": {
            "type": "object",
            "description": "Metadata pertaining to a document's origin, contents, etc.",
            "properties": {
                "annotations": {
                    "type": "object",
                    "patternProperties": {
                        ".*": {
                            "anyOf": [
                                {
                                    "type": "object",
                                    "properties": {
                                        "type": {
                                            "enum": ["property", "token", "object", "relation", "set", "span", "spanset", "hierset"],
                                            "description": "The type of annotation specified by this property."
                                        },
                                        "description": {"type": "string"}
                                    },
                                    "required": ["type"]
                                },
                                {
                                    "type": "object",
                                    "properties": {
                                        "use": {
                                            "type": "string",
                                            "description": "The name of the annotation to be used for this property."
                                        },
                                        "description": {"type": "string"}
                                    },
                                    "required": ["use"],
                                    "additionalProperties": false
                                }
                            ]
                        }   
                    }
                }
            },
            "required": ["annotations"]
        },
        "token": {
            "type": "array",
            "description": "The main sequence of tokens that is annotated in a document.",
            "items": {
                "type": "object",
                "required": ["id", "form"],
                "properties": {
                    "id": {
                        "type": "string",
                        "description": "Unique identifier for this token object."
                    },
                    "form": {
                        "type": "string",
                        "description": "Default string representation of this token."
                    },
                    "join": {
                        "description": "Used when a space was not present on either side of this token.",
                        "enum": ["left", "both", "right"]
                    },
                    "pos": {
                        "type": "string",
                        "description": "Part-of-speech tag."
                    },
                    "lemma": {
                        "type": "string",
                        "description": "Lemma."
                    },
                    "virttok": {
                        "type": "string",
                        "description": "Used to associate alternative tokenizations with the main token array."
                    }
                }
            }
        },
        "sentence": {
            "type": "array",
            "description": "Sentence boundaries.",
            "items": {"$ref": "#/$defs/span"}
        },
        "dependency": {
            "type": "array",
            "description": "Syntactic dependencies.",
            "items": {"$ref": "#/$defs/relation"}
        },
        "constituency": {
            "type": "array",
            "description": "Constituency parses.",
            "items": {"$ref": "#/$defs/hierset"}
        },
        "coreference": {
            "type": "array",
            "description": "Coreference.",
            "items": {"$ref": "#/$defs/spanset"}
        }
    },
    "$defs": {
        "token": {
            "type": "object",
            "required": ["id", "form"],
            "properties": {
                "id": {
                    "type": "string",
                    "description": "Unique identifier for this token object."
                },
                "form": {
                    "type": "string",
                    "description": "Default string representation of this token."
                },
                "virttok": {
                    "type": "string",
                    "description": "Used to associate alternative tokenizations with the main token array."
                }
            }
        },
        "span": {
            "type": "object",
            "required": ["id", "begin", "end"],
            "properties": {
                "id": {
                    "type": "string",
                    "description": "Element identifier.  Unique in document."
                },
                "name": {
                    "type": "string",
                    "description": "Name of the layout element."
                },
                "begin": {
                    "type": "integer",
                    "description": "The token at which this element begins.",
                    "minimum": 1,
                    "$comment": "One-based indexing."
                },
                "end": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The token at which this element ends."
                }
            }
        },
        "relation": {
            "type": "object",
            "required": ["from", "to"],
            "properties": {
                "id": {
                    "type": "string",
                    "description": "Element identifier.  Unique in document."
                },
                "from": {
                    "type": "integer",
                    "description": "The token at which this relation originates.",
                    "minimum": 1,
                    "$comment": "One-based indexing."
                },
                "to": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The token to which this relation applies."
                },
                "label": {
                    "type": "string",
                    "description": "Label for this relation."
                }
            }
        },
        "set": {
            "type": "object",
            "required": ["token", "set"],
            "properties": {
                "token": {
                    "type": "integer",
                    "description": "Index of token this annotation pertains to.",
                    "minimum": 1
                },
                "set": {
                    "type": "string",
                    "description": "Shared ID for an annotated set."
                },
                "label": {
                    "type": "string",
                    "description": "Label for this element of the set."
                },
                "substring": {
                    "type": "string",
                    "description": "Iff only a substring of the token is marked, this is that substring."
                }
            }
        },
        "spanset": {
            "type": "object",
            "required": ["set", "begin", "end"],
            "properties": {
                "set": {
                    "type": "string",
                    "description": "Shared ID for an annotated set."
                },
                "label": {
                    "type": "string",
                    "description": "Label for this element of the set."
                },
                "begin": {
                    "type": "integer",
                    "description": "The token at which this element begins.",
                    "minimum": 1,
                    "$comment": "One-based indexing."
                },
                "end": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The token at which this element ends."
                }
            }
        },
        "hierset": {
            "type": "object",
            "required": ["id", "begin", "end", "parent"],
            "properties": {
                "id": {
                    "type": "string",
                    "description": "Element identifier.  Unique in document."
                },
                "label": {
                    "type": "string",
                    "description": "Label for this element of the set."
                },
                "parent": {
                    "type": "string",
                    "description": "ID of the element above this one."
                },
                "begin": {
                    "type": "integer",
                    "description": "The token at which this element begins.",
                    "minimum": 1,
                    "$comment": "One-based indexing."
                },
                "end": {
                    "type": "integer",
                    "minimum": 1,
                    "description": "The token at which this element ends."
                }

            }
        }
    }
}
