const in_quotes = (rule) => choice(seq("'", rule, "'"), seq('"', rule, '"'));

const S = /[ \t\r\n]+/;

module.exports = grammar({
  name: "dtd",

  extras: ($) => [],

  inline: ($) => [$._eq],

  rules: {
    document: ($) =>
      repeat(choice($._markup_decl, $.pe_reference, S)),

    /** Names and Tokens **/

    _name_char: ($) =>
      choice(
        /\w/,
        ".",
        "-",
        ":",
        /[\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD]/,
        /[\u05BF\u05C4\u0670\u093C\u094D\u09BC\u09BE\u09BF\u09D7\u0A02\u0A3C\u0A3E\u0A3F]/,
        /[\u05C1-\u05C2\u064B-\u0652\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8]/,
        /[\u06EA-\u06ED\u0901-\u0903\u093E-\u094C\u0951-\u0954\u0962-\u0963\u0981-\u0983]/,
        /[\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09E2-\u09E3\u0A40-\u0A42\u0A47-\u0A48]/,
        /[\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u3005]/,
        /[\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u309A\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6]/,
        /[\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2]/,
        /[\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31]/,
        /[\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83]/,
        /[\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43]/,
        /[\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19]/,
        /[\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97]/,
        /[\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099]/
      ),

    _name: ($) => seq(choice(/\w/, "_", ":"), repeat($._name_char)),

    names: ($) => seq($._name, repeat(seq(/\s/, $._name))),

    nm_token: ($) => repeat1($._name_char),

    nm_tokens: ($) => seq($.nm_token, repeat(seq(/\s/, $.nm_token))),

    /** Literals **/

    entity_value: ($) =>
      choice(('"', repeat(choice(/[^<&"]/, $.pe_reference, $.reference)), '"'),
        ("'", repeat(choice(/[^<&']/, $.pe_reference, $.reference)), "'")),

    attribute_value: ($) => choice(
      seq('"', repeat(choice(/[^<&"]/, $.reference)), '"'),
      seq("'", repeat(choice(/[^<&']/, $.reference)), "'")
    ),

    system_literal: ($) => choice(seq('"', /[^"]*/, '"'), seq("'", /[^']/, "'")),

    pubid_literal: ($) => choice(
      seq('"', repeat(choice($.pubid_char, "'")), '"'),
      seq("'", repeat($.pubid_char), "'")
    ),

    pubid_char: ($) => /[ \r\n\ta-zA-Z0-9-()+,./:=?;!*#@$_%]/,

    /** Character Data **/

    // FIXME: Should exclude ']]>'
    _char_data: ($) => /[^<&]+/,

    /** Comments **/

    comment: ($) => seq("<!--", repeat(seq(optional("-"), /[^-]/)), "-->"),

    /** Processing instructions **/

    processing_instructions: ($) =>
      seq("<?", $.pi_target, optional(seq(/\s/, /. - [.'?>'.]/)), "?>"),

    pi_target: ($) => $._name, // This should exclude [Xx][Mm][Ll] eventually too


    /** Prolog **/

    _eq: ($) => seq(optional(S), "=", optional(S)),

    /** Document Type Definition **/


    _markup_decl: ($) =>
      choice(
        $.element_decl,
        $.attlist_decl,
        $.entity_decl,
        $.notation_decl,
        $.processing_instructions,
        $.comment
      ),

    /** External Subset **/

    /** Standalone Document Declaration **/


    /** Language Identification **/

    /** Element **/


    /** Start-tag **/


    attribute: ($) =>
      seq(alias($._name, $.attribute_name), $._eq, $.attribute_value),

    /** End-tag **/

    end_tag: ($) =>
      seq(
        // TODO: Make sure closing tag matches opening tag?
        "</",
        alias($._name, $.tag_name),
        optional(/\s/),
        ">"
      ),

    /** Content of Elements **/


    /** Tags for Empty Elements **/


    /** Element Type Declaration **/

    element_decl: ($) =>
      seq(
        "<!",
        "ELEMENT",
        S,
        alias($._name, $.element_name),
        S,
        $.content_spec,
        optional(S),
        ">"
      ),

    content_spec: ($) => choice("EMPTY", "ANY", $.mixed, $.children),

    /** Element-content Models **/

    children: ($) =>
      seq(
        choice($.element_choice, $.element_seq),
        optional(choice("?", "*", "+"))
      ),

    cp: ($) =>
      seq(
        choice($._name, $.element_choice, $.element_seq),
        optional(choice("?", "*", "+"))
      ),

    element_choice: ($) =>
      seq(
        "(",
        optional(S),
        $.cp,
        repeat1(seq(optional(S), "|", optional(S), $.cp)),
        optional(S),
        ")"
      ),

    element_seq: ($) =>
      seq(
        "(",
        optional(S),
        $.cp,
        repeat(seq(optional(S), ",", optional(S), $.cp)),
        optional(S),
        ")"
      ),

    /** Mixed-content Declaration **/

    mixed: ($) =>
      prec.right(
        choice(
          seq(
            "(",
            optional(/\s/),
            "#PCDATA",
            repeat(seq(optional(/\s/), "|", optional(/\s/), $._name)),
            optional(/\s/),
            ")*"
          ),
          seq("(", optional(/\s/), "#PCDATA", optional(/\s/), ")")
        )
      ),

    /** Attribute-list Declaration **/

    attlist_decl: ($) =>
      seq(
        "<!",
        "ATTLIST",
        /\s/,
        alias($._name, $.attlist_name),
        repeat($.attribute_def),
        optional(/\s/),
        ">"
      ),

    attribute_def: ($) =>
      seq(
        /\s/,
        alias($._name, $.attribute_name),
        /\s/,
        $.attribute_type,
        /\s/,
        $.default_decl
      ),

    /** Attribute Types **/

    attribute_type: ($) =>
      choice($._string_type, $._tokenized_type, $._enumerated_type),

    _string_type: ($) => "CDATA",

    _tokenized_type: ($) =>
      choice(
        "ID",
        "IDREF",
        "IDREFS",
        "ENTITY",
        "ENTITIES",
        "NMTOKEN",
        "NMTOKENS"
      ),

    /** Enumerated Attribute Types **/

    _enumerated_type: ($) => choice($.notation_type, $.enumeration),

    notation_type: ($) =>
      seq(
        "NOTATION",
        /\s/,
        "(",
        optional(/\s/),
        alias($._name, $.notation_type_name),
        repeat(
          seq(
            optional(/\s/),
            "|",
            optional(/\s/),
            alias($._name, $.notation_type_name)
          )
        ),
        ")"
      ),

    enumeration: ($) =>
      seq(
        "(",
        optional(/\s/),
        $.nm_token,
        repeat(seq(optional(/\s/), "|", optional(/\s/), $.nm_token)),
        optional(/\s/),
        ")"
      ),

    /** Attribute Defaults **/

    default_decl: ($) =>
      choice(
        "#REQUIRED",
        "#IMPLIED",
        seq(optional(seq("#FIXED", optional(/\s/))), $.attribute_value)
      ),


    /** Character Reference **/

    char_ref: ($) =>
      choice(seq("&#", /[0-9]+/, ";"), seq("&#x", /[0-9a-fA-F]+/, ";")),

    /** Entity Reference **/

    reference: ($) => choice($.entity_ref, $.char_ref),

    entity_ref: ($) => seq("&", $._name, ";"),

    pe_reference: ($) => seq("%", $._name, ";"),

    /** Entity Declaration **/

    entity_decl: ($) => choice($.ge_decl, $.pe_decl),

    ge_decl: ($) =>
      seq(
        "<!",
        "ENTITY",
        /\s/,
        $._name,
        /\s/,
        $.entity_def,
        optional(/\s/),
        ">"
      ),

    pe_decl: ($) =>
      seq(
        "<!",
        "ENTITY",
        /\s/,
        "%",
        /\s/,
        $._name,
        /\s/,
        $.pe_def,
        optional(/\s/),
        ">"
      ),

    entity_def: ($) =>
      prec.left(
        choice($.entity_value, seq($.external_id, optional($.ndata_decl)))
      ),

    pe_def: ($) => choice($.entity_value, $.external_id),

    /** External Entity Declaration **/

    external_id: ($) =>
      choice(
        seq("SYSTEM", /\s/, $.system_literal),
        seq("PUBLIC", /\s/, $.pubid_literal, /\s/, $.system_literal)
      ),

    ndata_decl: ($) => seq(/\s/, "NDATA", /\s/, alias($._name, $.ndata_name)),


    /** Encoding Declaration **/

    encoding_decl: ($) =>
      seq(/\s/, "encoding", $._eq, alias($._enc_name, $.attribute_value)),

    _enc_name: ($) =>
      in_quotes(seq(/[A-Za-z]/, repeat(choice(/[A-Za-z0-9._]/, "-")))),

    /** Notation Declaration **/

    notation_decl: ($) =>
      seq(
        "<!",
        "NOTATION",
        /\s/,
        alias($._name, $.notation_name),
        /\s/,
        choice($.external_id, $.public_id),
        optional(/\s/),
        ">"
      ),

    public_id: ($) => prec.left(seq("PUBLIC", /\s/, $.pubid_literal)),
  },
});
