/* ox.c
 * Copyright (c) 2011, Peter Ohler
 * All rights reserved.
 */

#include "ox.h"

#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "intern.h"
#include "ruby.h"
#include "sax.h"

/* maximum to allocate on the stack, arbitrary limit */
#define SMALL_XML 4096
#define WITH_CACHE_TESTS 0

typedef struct _yesNoOpt {
    VALUE sym;
    char *attr;
} *YesNoOpt;

void Init_ox();

VALUE Ox = Qnil;

ID ox_abort_id;
ID ox_at_column_id;
ID ox_at_content_id;
ID ox_at_id;
ID ox_at_line_id;
ID ox_at_pos_id;
ID ox_at_value_id;
ID ox_attr_id;
ID ox_attr_value_id;
ID ox_attributes_id;
ID ox_attrs_done_id;
ID ox_beg_id;
ID ox_bigdecimal_id;
ID ox_call_id;
ID ox_cdata_id;
ID ox_comment_id;
ID ox_den_id;
ID ox_doctype_id;
ID ox_end_element_id;
ID ox_end_id;
ID ox_end_instruct_id;
ID ox_error_id;
ID ox_excl_id;
ID ox_external_encoding_id;
ID ox_fileno_id;
ID ox_force_encoding_id;
ID ox_inspect_id;
ID ox_instruct_id;
ID ox_jd_id;
ID ox_keys_id;
ID ox_local_id;
ID ox_mesg_id;
ID ox_message_id;
ID ox_new_id;
ID ox_nodes_id;
ID ox_num_id;
ID ox_parse_id;
ID ox_pos_id;
ID ox_read_id;
ID ox_readpartial_id;
ID ox_start_element_id;
ID ox_string_id;
ID ox_text_id;
ID ox_to_c_id;
ID ox_value_id;

VALUE ox_encoding_sym;
VALUE ox_version_sym;
VALUE ox_standalone_sym;
VALUE ox_indent_sym;
VALUE ox_size_sym;

VALUE ox_empty_string;
VALUE ox_zero_fixnum;
VALUE ox_sym_bank;  // Array

VALUE ox_arg_error_class;
VALUE ox_bag_clas;
VALUE ox_cdata_clas;
VALUE ox_comment_clas;
VALUE ox_raw_clas;
VALUE ox_date_class;
VALUE ox_doctype_clas;
VALUE ox_document_clas;
VALUE ox_element_clas;
VALUE ox_instruct_clas;
VALUE ox_parse_error_class;
VALUE ox_stringio_class;
VALUE ox_struct_class;
VALUE ox_syntax_error_class;
VALUE ox_time_class;

SlotCache ox_class_cache = 0;

static VALUE abort_sym;
static VALUE active_sym;
static VALUE attr_key_mod_sym;
static VALUE auto_define_sym;
static VALUE auto_sym;
static VALUE block_sym;
static VALUE circular_sym;
static VALUE convert_special_sym;
static VALUE effort_sym;
static VALUE generic_sym;
static VALUE hash_no_attrs_sym;
static VALUE hash_sym;
static VALUE inactive_sym;
static VALUE invalid_replace_sym;
static VALUE limited_sym;
static VALUE margin_sym;
static VALUE mode_sym;
static VALUE nest_ok_sym;
static VALUE no_empty_sym;
static VALUE object_sym;
static VALUE off_sym;
static VALUE opt_format_sym;
static VALUE optimized_sym;
static VALUE overlay_sym;
static VALUE skip_none_sym;
static VALUE skip_off_sym;
static VALUE skip_return_sym;
static VALUE skip_sym;
static VALUE skip_white_sym;
static VALUE smart_sym;
static VALUE strict_sym;
static VALUE strip_namespace_sym;
static VALUE symbolize_keys_sym;
static VALUE symbolize_sym;
static VALUE tolerant_sym;
static VALUE trace_sym;
static VALUE with_cdata_sym;
static VALUE with_dtd_sym;
static VALUE with_instruct_sym;
static VALUE with_xml_sym;
static VALUE xsd_date_sym;
static VALUE element_key_mod_sym;

static ID encoding_id;
static ID has_key_id;

rb_encoding *ox_utf8_encoding = 0;

struct _options ox_default_options = {
    {'\0'},        // encoding
    {'\0'},        // margin
    2,             // indent
    0,             // trace
    0,             // margin_len
    No,            // with_dtd
    No,            // with_xml
    No,            // with_instruct
    No,            // circular
    No,            // xsd_date
    NoMode,        // mode
    StrictEffort,  // effort
    Yes,           // sym_keys
    SpcSkip,       // skip
    No,            // smart
    true,          // convert_special
    No,            // allow_invalid
    false,         // no_empty
    false,         // with_cdata
    {'\0'},        // inv_repl
    {'\0'},        // strip_ns
    NULL,          // html_hints
    Qnil,          // attr_key_mod;
    Qnil,          // element_key_mod;
    0              // rb_enc
};

extern ParseCallbacks ox_obj_callbacks;
extern ParseCallbacks ox_gen_callbacks;
extern ParseCallbacks ox_limited_callbacks;
extern ParseCallbacks ox_nomode_callbacks;
extern ParseCallbacks ox_hash_callbacks;
extern ParseCallbacks ox_hash_cdata_callbacks;
extern ParseCallbacks ox_hash_no_attrs_callbacks;
extern ParseCallbacks ox_hash_no_attrs_cdata_callbacks;

static void parse_dump_options(VALUE ropts, Options copts);

static char *defuse_bom(char *xml, Options options) {
    switch ((uint8_t)*xml) {
    case 0xEF:  // UTF-8
        if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
            options->rb_enc = ox_utf8_encoding;
            xml += 3;
        } else {
            rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
        }
        break;
#if 0
    case 0xFE: // UTF-16BE
	if (0xFF == (uint8_t)xml[1]) {
	    options->rb_enc = ox_utf16be_encoding;
	    xml += 2;
	} else {
	    rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
	}
	break;
    case 0xFF: // UTF-16LE or UTF-32LE
	if (0xFE == (uint8_t)xml[1]) {
	    if (0x00 == (uint8_t)xml[2] && 0x00 == (uint8_t)xml[3]) {
		options->rb_enc = ox_utf32le_encoding;
		xml += 4;
	    } else {
		options->rb_enc = ox_utf16le_encoding;
		xml += 2;
	    }
	} else {
	    rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
	}
	break;
    case 0x00: // UTF-32BE
	if (0x00 == (uint8_t)xml[1] && 0xFE == (uint8_t)xml[2] && 0xFF == (uint8_t)xml[3]) {
	    options->rb_enc = ox_utf32be_encoding;
	    xml += 4;
	} else {
	    rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
	}
	break;
#endif
    default:
        // Let it fail if there is a BOM that is not UTF-8. Other BOM options
        // are not ASCII compatible.
        break;
    }
    return xml;
}

static VALUE hints_to_overlay(Hints hints) {
    volatile VALUE overlay = rb_hash_new();
    Hint           h;
    int            i;
    VALUE          ov;

    for (i = hints->size, h = hints->hints; 0 < i; i--, h++) {
        switch (h->overlay) {
        case InactiveOverlay: ov = inactive_sym; break;
        case BlockOverlay: ov = block_sym; break;
        case OffOverlay: ov = off_sym; break;
        case AbortOverlay: ov = abort_sym; break;
        case NestOverlay: ov = nest_ok_sym; break;
        case ActiveOverlay:
        default: ov = active_sym; break;
        }
        rb_hash_aset(overlay, rb_str_new2(h->name), ov);
    }
    return overlay;
}

/* call-seq: default_options() => Hash
 *
 * Returns the default load and dump options as a Hash. The options are
 * - _:margin_ [String] left margin to inset when dumping
 * - _:indent_ [Fixnum] number of spaces to indent each element in an XML document
 * - _:trace_ [Fixnum] trace level where 0 is silent
 * - _:encoding_ [String] character encoding for the XML file
 * - _:with_dtd_ [true|false|nil] include DTD in the dump
 * - _:with_instruct_ [true|false|nil] include instructions in the dump
 * - _:with_xml_ [true|false|nil] include XML prolog in the dump
 * - _:circular_ [true|false|nil] support circular references while dumping
 * - _:xsd_date_ [true|false|nil] use XSD date format instead of decimal format
 * - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML
 * - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading
 * - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings
 * - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil
 * - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
 * - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
 * - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
 * - _:convert_special_ [true|false|nil] flag indicating special characters like &lt; are converted with the SAX parser
 * - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
 * as hex. A string, limited to 10 characters will replace the invalid character with the replace.
 * - _:no_empty_ [true|false|nil] flag indicating there should be no empty elements in a dump
 * - _:with_cdata_ [true|false] includes cdata in hash_load results
 * - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
 * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
 * - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
 *   - _:active_ - make the normal callback for the element
 *   - _:nest_ok_ - active but the nesting check is ignored
 *   - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
 *   - _:block_ - block this and all children callbacks
 *   - _:off_ - block this element and it's children unless the child element is active
 *   - _:abort_ - abort the html processing and return
 *
 * *return* [Hash] all current option settings.
 *
 * Note that an indent of less than zero will result in a tight one line output
 * unless the text in the XML fields contain new line characters.
 */
static VALUE get_def_opts(VALUE self) {
    VALUE opts = rb_hash_new();
    int   elen = (int)strlen(ox_default_options.encoding);

    rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
    rb_hash_aset(opts, margin_sym, rb_str_new(ox_default_options.margin, ox_default_options.margin_len));
    rb_hash_aset(opts, ox_indent_sym, INT2FIX(ox_default_options.indent));
    rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
    rb_hash_aset(opts,
                 with_dtd_sym,
                 (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
    rb_hash_aset(opts,
                 with_xml_sym,
                 (Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
    rb_hash_aset(
        opts,
        with_instruct_sym,
        (Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
    rb_hash_aset(opts,
                 circular_sym,
                 (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
    rb_hash_aset(opts,
                 xsd_date_sym,
                 (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
    rb_hash_aset(opts,
                 symbolize_keys_sym,
                 (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
    rb_hash_aset(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
    rb_hash_aset(opts, element_key_mod_sym, ox_default_options.element_key_mod);
    rb_hash_aset(opts,
                 smart_sym,
                 (Yes == ox_default_options.smart) ? Qtrue : ((No == ox_default_options.smart) ? Qfalse : Qnil));
    rb_hash_aset(opts, convert_special_sym, (ox_default_options.convert_special) ? Qtrue : Qfalse);
    rb_hash_aset(opts, no_empty_sym, (ox_default_options.no_empty) ? Qtrue : Qfalse);
    rb_hash_aset(opts, with_cdata_sym, (ox_default_options.with_cdata) ? Qtrue : Qfalse);
    switch (ox_default_options.mode) {
    case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break;
    case GenMode: rb_hash_aset(opts, mode_sym, generic_sym); break;
    case LimMode: rb_hash_aset(opts, mode_sym, limited_sym); break;
    case HashMode: rb_hash_aset(opts, mode_sym, hash_sym); break;
    case HashNoAttrMode: rb_hash_aset(opts, mode_sym, hash_no_attrs_sym); break;
    case NoMode:
    default: rb_hash_aset(opts, mode_sym, Qnil); break;
    }
    switch (ox_default_options.effort) {
    case StrictEffort: rb_hash_aset(opts, effort_sym, strict_sym); break;
    case TolerantEffort: rb_hash_aset(opts, effort_sym, tolerant_sym); break;
    case AutoEffort: rb_hash_aset(opts, effort_sym, auto_define_sym); break;
    case NoEffort:
    default: rb_hash_aset(opts, effort_sym, Qnil); break;
    }
    switch (ox_default_options.skip) {
    case OffSkip: rb_hash_aset(opts, skip_sym, skip_off_sym); break;
    case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
    case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
    case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
    default: rb_hash_aset(opts, skip_sym, Qnil); break;
    }
    if (Yes == ox_default_options.allow_invalid) {
        rb_hash_aset(opts, invalid_replace_sym, Qnil);
    } else {
        rb_hash_aset(opts,
                     invalid_replace_sym,
                     rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
    }
    if ('\0' == *ox_default_options.strip_ns) {
        rb_hash_aset(opts, strip_namespace_sym, Qfalse);
    } else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
        rb_hash_aset(opts, strip_namespace_sym, Qtrue);
    } else {
        rb_hash_aset(opts,
                     strip_namespace_sym,
                     rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
    }
    if (NULL == ox_default_options.html_hints) {
        // rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_hints_html()));
        rb_hash_aset(opts, overlay_sym, Qnil);
    } else {
        rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_default_options.html_hints));
    }
    return opts;
}

static int set_overlay(VALUE key, VALUE value, VALUE ctx) {
    Hints hints = (Hints)ctx;
    Hint  hint;

    if (NULL != (hint = ox_hint_find(hints, StringValuePtr(key)))) {
        if (active_sym == value) {
            hint->overlay = ActiveOverlay;
        } else if (inactive_sym == value) {
            hint->overlay = InactiveOverlay;
        } else if (block_sym == value) {
            hint->overlay = BlockOverlay;
        } else if (nest_ok_sym == value) {
            hint->overlay = NestOverlay;
        } else if (off_sym == value) {
            hint->overlay = OffOverlay;
        } else if (abort_sym == value) {
            hint->overlay = AbortOverlay;
        }
    }
    return ST_CONTINUE;
}

/* call-seq: sax_html_overlay() => Hash
 *
 * Returns an overlay hash that can be modified and used as an overlay in the
 * default options or in the sax_html() function call. Values for the keys are:
 *   - _:active_ - make the normal callback for the element
 *   - _:nest_ok_ - active but ignore nest check
 *   - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
 *   - _:block_ - block this and all children callbacks
 *   - _:off_ - block this element and it's children unless the child element is active
 *   - _:abort_ - abort the html processing and return
 *
 * *return* [Hash] default SAX HTML settings
 */
static VALUE sax_html_overlay(VALUE self) {
    return hints_to_overlay(ox_hints_html());
}

/* call-seq: default_options=(opts)
 *
 * Sets the default options for load and dump.
 * - +opts+ [Hash] opts options to change
 *   - _:margin_ [String] left margin to inset when dumping
 *   - _:indent_ [Fixnum] number of spaces to indent each element in an XML document
 *   - _:trace_ [Fixnum] trace level where 0 is silent
 *   - _:encoding_ [String] character encoding for the XML file
 *   - _:with_dtd_ [true|false|nil] include DTD in the dump
 *   - _:with_instruct_ [true|false|nil] include instructions in the dump
 *   - _:with_xml_ [true|false|nil] include XML prolog in the dump
 *   - _:circular_ [true|false|nil] support circular references while dumping
 *   - _:xsd_date_ [true|false|nil] use XSD date format instead of decimal format
 *   - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML
 *   - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading
 *   - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings
 *   - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil
 *   - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
 *   - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
 *   - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
 *   - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
 * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
 *   - _:strip_namespace_ [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
 * will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
 * - _:with_cdata_ [true|false] includes cdata in hash_load results
 * - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
 *   - _:active_ - make the normal callback for the element
 *   - _:nest_ok_ - active but ignore nest check
 *   - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
 *   - _:block_ - block this and all children callbacks
 *   - _:off_ - block this element and it's children unless the child element is active
 *   - _:abort_ - abort the html processing and return
 *
 * *return* [nil]
 */
static VALUE set_def_opts(VALUE self, VALUE opts) {
    struct _yesNoOpt ynos[] = {{with_xml_sym, &ox_default_options.with_xml},
                               {with_dtd_sym, &ox_default_options.with_dtd},
                               {with_instruct_sym, &ox_default_options.with_instruct},
                               {xsd_date_sym, &ox_default_options.xsd_date},
                               {circular_sym, &ox_default_options.circular},
                               {symbolize_keys_sym, &ox_default_options.sym_keys},
                               {smart_sym, &ox_default_options.smart},
                               {Qnil, 0}};
    YesNoOpt         o;
    VALUE            v;

    Check_Type(opts, T_HASH);

    v = rb_hash_aref(opts, ox_encoding_sym);
    if (Qnil == v) {
        *ox_default_options.encoding = '\0';
    } else {
        Check_Type(v, T_STRING);
        strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
        ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
    }

    v = rb_hash_aref(opts, ox_indent_sym);
    if (Qnil != v) {
        Check_Type(v, T_FIXNUM);
        ox_default_options.indent = FIX2INT(v);
    }

    v = rb_hash_aref(opts, trace_sym);
    if (Qnil != v) {
        Check_Type(v, T_FIXNUM);
        ox_default_options.trace = FIX2INT(v);
    }

    v = rb_hash_aref(opts, mode_sym);
    if (Qnil == v) {
        ox_default_options.mode = NoMode;
    } else if (object_sym == v) {
        ox_default_options.mode = ObjMode;
    } else if (generic_sym == v) {
        ox_default_options.mode = GenMode;
    } else if (limited_sym == v) {
        ox_default_options.mode = LimMode;
    } else if (hash_sym == v) {
        ox_default_options.mode = HashMode;
    } else if (hash_no_attrs_sym == v) {
        ox_default_options.mode = HashNoAttrMode;
    } else {
        rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, :hash, :hash_no_attrs, or nil.\n");
    }

    v = rb_hash_aref(opts, effort_sym);
    if (Qnil == v) {
        ox_default_options.effort = NoEffort;
    } else if (strict_sym == v) {
        ox_default_options.effort = StrictEffort;
    } else if (tolerant_sym == v) {
        ox_default_options.effort = TolerantEffort;
    } else if (auto_define_sym == v) {
        ox_default_options.effort = AutoEffort;
    } else {
        rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
    }

    v = rb_hash_aref(opts, skip_sym);
    if (Qnil == v) {
        ox_default_options.skip = NoSkip;
    } else if (skip_off_sym == v) {
        ox_default_options.skip = OffSkip;
    } else if (skip_none_sym == v) {
        ox_default_options.skip = NoSkip;
    } else if (skip_return_sym == v) {
        ox_default_options.skip = CrSkip;
    } else if (skip_white_sym == v) {
        ox_default_options.skip = SpcSkip;
    } else {
        rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, :skip_off, or nil.\n");
    }

    v = rb_hash_lookup(opts, convert_special_sym);
    if (Qnil == v) {
        // no change
    } else if (Qtrue == v) {
        ox_default_options.convert_special = 1;
    } else if (Qfalse == v) {
        ox_default_options.convert_special = 0;
    } else {
        rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
    }

    v = rb_hash_lookup(opts, no_empty_sym);
    if (Qnil == v) {
        // no change
    } else if (Qtrue == v) {
        ox_default_options.no_empty = 1;
    } else if (Qfalse == v) {
        ox_default_options.no_empty = 0;
    } else {
        rb_raise(ox_parse_error_class, ":no_empty must be true or false.\n");
    }

    v = rb_hash_aref(opts, invalid_replace_sym);
    if (Qnil == v) {
        ox_default_options.allow_invalid = Yes;
    } else {
        long slen;

        Check_Type(v, T_STRING);
        slen = RSTRING_LEN(v);
        if (sizeof(ox_default_options.inv_repl) - 2 < (size_t)slen) {
            rb_raise(ox_parse_error_class,
                     ":invalid_replace can be no longer than %d characters.",
                     (int)sizeof(ox_default_options.inv_repl) - 2);
        }
        strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
        ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
        *ox_default_options.inv_repl                                         = (char)slen;
        ox_default_options.allow_invalid                                     = No;
    }

    v = rb_hash_aref(opts, strip_namespace_sym);
    if (Qfalse == v) {
        *ox_default_options.strip_ns = '\0';
    } else if (Qtrue == v) {
        *ox_default_options.strip_ns   = '*';
        ox_default_options.strip_ns[1] = '\0';
    } else if (Qnil != v) {
        long slen;

        Check_Type(v, T_STRING);
        slen = RSTRING_LEN(v);
        if (sizeof(ox_default_options.strip_ns) - 1 < (size_t)slen) {
            rb_raise(ox_parse_error_class,
                     ":strip_namespace can be no longer than %d characters.",
                     (int)sizeof(ox_default_options.strip_ns) - 1);
        }
        strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
        ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
    }

    v = rb_hash_aref(opts, margin_sym);
    if (Qnil != v) {
        long slen;

        Check_Type(v, T_STRING);
        slen = RSTRING_LEN(v);
        if (sizeof(ox_default_options.margin) - 1 < (size_t)slen) {
            rb_raise(ox_parse_error_class,
                     ":margin can be no longer than %d characters.",
                     (int)sizeof(ox_default_options.margin) - 1);
        }
        strncpy(ox_default_options.margin, StringValuePtr(v), sizeof(ox_default_options.margin) - 1);
        ox_default_options.margin[sizeof(ox_default_options.margin) - 1] = '\0';
        ox_default_options.margin_len                                    = strlen(ox_default_options.margin);
    }

    for (o = ynos; 0 != o->attr; o++) {
        v = rb_hash_lookup(opts, o->sym);
        if (Qnil == v) {
            *o->attr = NotSet;
        } else if (Qtrue == v) {
            *o->attr = Yes;
        } else if (Qfalse == v) {
            *o->attr = No;
        } else {
            rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
        }
    }
    v = rb_hash_aref(opts, overlay_sym);
    if (Qnil == v) {
        ox_hints_destroy(ox_default_options.html_hints);
        ox_default_options.html_hints = NULL;
    } else {
        int cnt;

        Check_Type(v, T_HASH);
        cnt = (int)RHASH_SIZE(v);
        if (0 == cnt) {
            ox_hints_destroy(ox_default_options.html_hints);
            ox_default_options.html_hints = NULL;
        } else {
            ox_hints_destroy(ox_default_options.html_hints);
            ox_default_options.html_hints = ox_hints_dup(ox_hints_html());
            rb_hash_foreach(v, set_overlay, (VALUE)ox_default_options.html_hints);
        }
    }
    if (Qnil != (v = rb_hash_lookup(opts, with_cdata_sym))) {
        ox_default_options.with_cdata = (Qtrue == v);
    }

    ox_default_options.element_key_mod = rb_hash_lookup2(opts, element_key_mod_sym, ox_default_options.element_key_mod);
    ox_default_options.attr_key_mod    = rb_hash_lookup2(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);

    return Qnil;
}

/* call-seq: parse_obj(xml) => Object
 *
 * Parses an XML document String that is in the object format and returns an
 * Object of the type represented by the XML. This function expects an
 * optimized XML formated String. For other formats use the more generic
 * Ox.load() method.  Raises an exception if the XML is malformed or the
 * classes specified in the file are not valid.
 * - +xml+ [String] XML String in optimized Object format.
 * *return* [Object] deserialized Object.
 */
static VALUE to_obj(VALUE self, VALUE ruby_xml) {
    char           *xml, *x;
    size_t          len;
    VALUE           obj;
    struct _options options = ox_default_options;
    struct _err     err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x   = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
    rb_gc_disable();
    obj = ox_parse(xml, len - 1, ox_obj_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    RB_GC_GUARD(obj);
    rb_gc_enable();
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}

/* call-seq: parse(xml) => Ox::Document or Ox::Element
 *
 * Parses and XML document String into an Ox::Document or Ox::Element.
 * - +xml+ [String] xml XML String
 * *return* [Ox::Document or Ox::Element] parsed XML document.
 *
 * _raise_ [Exception] if the XML is malformed.
 */
static VALUE to_gen(VALUE self, VALUE ruby_xml) {
    char           *xml, *x;
    size_t          len;
    VALUE           obj;
    struct _options options = ox_default_options;
    struct _err     err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x   = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
    obj = ox_parse(xml, len - 1, ox_gen_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}

static int load_options_cb(VALUE k, VALUE v, VALUE opts) {
    Options copts = (Options)opts;

    if (mode_sym == k) {
        if (object_sym == v) {
            copts->mode = ObjMode;
        } else if (optimized_sym == v) {
            copts->mode = ObjMode;
        } else if (generic_sym == v) {
            copts->mode = GenMode;
        } else if (limited_sym == v) {
            copts->mode = LimMode;
        } else if (hash_sym == v) {
            copts->mode = HashMode;
        } else if (hash_no_attrs_sym == v) {
            copts->mode = HashNoAttrMode;
        } else {
            rb_raise(ox_parse_error_class, ":mode must be :generic, :object, :limited, :hash, :hash_no_attrs.\n");
        }
    } else if (effort_sym == k) {
        if (auto_define_sym == v) {
            copts->effort = AutoEffort;
        } else if (tolerant_sym == v) {
            copts->effort = TolerantEffort;
        } else if (strict_sym == v) {
            copts->effort = StrictEffort;
        } else {
            rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
        }
    } else if (skip_sym == k) {
        if (skip_none_sym == v) {
            copts->skip = NoSkip;
        } else if (skip_off_sym == v) {
            copts->skip = OffSkip;
        } else if (skip_return_sym == v) {
            copts->skip = CrSkip;
        } else if (skip_white_sym == v) {
            copts->skip = SpcSkip;
        } else {
            rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or :skip_off.\n");
        }
    } else if (trace_sym == k) {
        Check_Type(v, T_FIXNUM);
        copts->trace = FIX2INT(v);
    } else if (symbolize_keys_sym == k) {
        copts->sym_keys = (Qfalse == v) ? No : Yes;
    } else if (element_key_mod_sym == k) {
        copts->element_key_mod = v;
    } else if (attr_key_mod_sym == k) {
        copts->attr_key_mod = v;
    } else if (convert_special_sym == k) {
        copts->convert_special = (Qfalse != v);
    } else if (no_empty_sym == k) {
        copts->no_empty = (Qfalse != v);
    } else if (invalid_replace_sym == k) {
        if (Qnil == v) {
            copts->allow_invalid = Yes;
        } else {
            long slen;

            Check_Type(v, T_STRING);
            slen = RSTRING_LEN(v);
            if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
                rb_raise(ox_parse_error_class,
                         ":invalid_replace can be no longer than %d characters.",
                         (int)sizeof(copts->inv_repl) - 2);
            }
            strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
            copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
            *copts->inv_repl                             = (char)slen;
            copts->allow_invalid                         = No;
        }
    } else if (strip_namespace_sym == k) {
        if (Qfalse == v) {
            *copts->strip_ns = '\0';
        } else if (Qtrue == v) {
            *copts->strip_ns   = '*';
            copts->strip_ns[1] = '\0';
        } else if (Qnil != v) {
            long slen;

            Check_Type(v, T_STRING);
            slen = RSTRING_LEN(v);
            if (sizeof(copts->strip_ns) - 1 < (size_t)slen) {
                rb_raise(ox_parse_error_class,
                         ":strip_namespace can be no longer than %d characters.",
                         (int)sizeof(copts->strip_ns) - 1);
            }
            strncpy(copts->strip_ns, StringValuePtr(v), sizeof(copts->strip_ns) - 1);
            copts->strip_ns[sizeof(copts->strip_ns) - 1] = '\0';
        }
    } else if (margin_sym == k) {
        long slen;

        Check_Type(v, T_STRING);
        slen = RSTRING_LEN(v);
        if (sizeof(copts->margin) - 1 < (size_t)slen) {
            rb_raise(ox_parse_error_class,
                     ":margin can be no longer than %d characters.",
                     (int)sizeof(copts->margin) - 1);
        }
        strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
        copts->margin[sizeof(copts->margin) - 1] = '\0';
        copts->margin_len                        = strlen(copts->margin);
    } else if (with_cdata_sym == k) {
        copts->with_cdata = (Qtrue == v);
    }

    return ST_CONTINUE;
}

static VALUE load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
    VALUE           obj;
    struct _options options = ox_default_options;

    if (1 == argc && rb_cHash == rb_obj_class(*argv)) {
        rb_hash_foreach(*argv, load_options_cb, (VALUE)&options);
    }
    if ('\0' == *options.encoding) {
        if (Qnil != encoding) {
            options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
        } else {
            options.rb_enc = 0;
        }
    } else if (0 == options.rb_enc) {
        options.rb_enc = rb_enc_find(options.encoding);
    }
    xml = defuse_bom(xml, &options);
    switch (options.mode) {
    case ObjMode:
        rb_gc_disable();
        obj = ox_parse(xml, len, ox_obj_callbacks, 0, &options, err);
        RB_GC_GUARD(obj);
        rb_gc_enable();
        break;
    case GenMode: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
    case LimMode: obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err); break;
    case HashMode:
        if (options.with_cdata) {
            obj = ox_parse(xml, len, ox_hash_cdata_callbacks, 0, &options, err);
        } else {
            obj = ox_parse(xml, len, ox_hash_callbacks, 0, &options, err);
        }
        break;
    case HashNoAttrMode:
        if (options.with_cdata) {
            obj = ox_parse(xml, len, ox_hash_no_attrs_cdata_callbacks, 0, &options, err);
        } else {
            obj = ox_parse(xml, len, ox_hash_no_attrs_callbacks, 0, &options, err);
        }
        break;
    case NoMode: obj = ox_parse(xml, len, ox_nomode_callbacks, 0, &options, err); break;
    default: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
    }
    return obj;
}

/* call-seq: load(xml, options) => Ox::Document or Ox::Element or Object
 *
 * Parses and XML document String into an Ox::Document, or Ox::Element, or
 * Object depending on the options.  Raises an exception if the XML is malformed
 * or the classes specified are not valid. If a block is given it will be called
 * on the completion of each complete top level entity with that entity as it's
 * only argument.
 *
 * - +xml+ [String] XML String
 * - +options+ [Hash] load options
 *   - *:mode* [:object|:generic|:limited] format expected
 *     - _:object_ - object format
 *     - _:generic_ - read as a generic XML file
 *     - _:limited_ - read as a generic XML file but with callbacks on text and elements events only
 *     - _:hash_ - read and convert to a Hash and core class objects only
 *     - _:hash_no_attrs_ - read and convert to a Hash and core class objects only without capturing attributes
 *   - *:effort* [:strict|:tolerant|:auto_define] effort to use when an undefined class is encountered, default: :strict
 *     - _:strict_ - raise an NameError for missing classes and modules
 *     - _:tolerant_ - return nil for missing classes and modules
 *     - _:auto_define_ - auto define missing classes and modules
 *   - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
 *   - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
 *   - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
 * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
 *   - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
 * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
 *   - *:with_cdata* [true|false] if true cdata is included in hash_load output otherwise it is not.
 */
static VALUE load_str(int argc, VALUE *argv, VALUE self) {
    char       *xml;
    size_t      len;
    VALUE       obj;
    VALUE       encoding;
    struct _err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(*argv) + 1;
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
    encoding = rb_obj_encoding(*argv);
    memcpy(xml, StringValuePtr(*argv), len);
    xml[len - 1] = '\0';
    obj          = load(xml, len - 1, argc - 1, argv + 1, self, encoding, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}

/* call-seq: load_file(file_path, options) => Ox::Document or Ox::Element or Object
 *
 * Parses and XML document from a file into an Ox::Document, or Ox::Element,
 * or Object depending on the options.	Raises an exception if the XML is
 * malformed or the classes specified are not valid.
 * - +file_path+ [String] file path to read the XML document from
 * - +options+ [Hash] load options
 *   - *:mode* [:object|:generic|:limited] format expected
 *     - _:object_ - object format
 *     - _:generic_ - read as a generic XML file
 *     - _:limited_ - read as a generic XML file but with callbacks on text and elements events only
 *     - _:hash_ - read and convert to a Hash and core class objects only
 *     - _:hash_no_attrs_ - read and convert to a Hash and core class objects only without capturing attributes
 *   - *:effort* [:strict|:tolerant|:auto_define] effort to use when an undefined class is encountered, default: :strict
 *     - _:strict_ - raise an NameError for missing classes and modules
 *     - _:tolerant_ - return nil for missing classes and modules
 *     - _:auto_define_ - auto define missing classes and modules
 *   - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
 *   - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
 *   - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
 * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
 *   - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
 * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
 */
static VALUE load_file(int argc, VALUE *argv, VALUE self) {
    char       *path;
    char       *xml;
    FILE       *f;
    off_t       len;
    VALUE       obj;
    struct _err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    path = StringValuePtr(*argv);
    if (0 == (f = fopen(path, "r"))) {
        rb_raise(rb_eIOError, "%s\n", strerror(errno));
    }
    fseek(f, 0, SEEK_END);
    len = ftello(f);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len + 1);
    } else {
        xml = ALLOCA_N(char, len + 1);
    }
    fseek(f, 0, SEEK_SET);
    if ((size_t)len != fread(xml, 1, len, f)) {
        ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
        obj = Qnil;
    } else {
        xml[len] = '\0';
        obj      = load(xml, len, argc - 1, argv + 1, self, Qnil, &err);
    }
    fclose(f);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}

/* call-seq: sax_parse(handler, io, options)
 *
 * Parses an IO stream or file containing an XML document. Raises an exception
 * if the XML is malformed or the classes specified are not valid.
 * - +handler+ [Ox::Sax] SAX (responds to OX::Sax methods) like handler
 * - +io+ [IO|String] IO Object to read from
 * - +options+ [Hash] options parse options
 *   - *:convert_special* [true|false] flag indicating special characters like &lt; are converted
 *   - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
 *   - *:smart* [true|false] flag indicating the parser uses hints if available (use with html)
 *   - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
 * space into a single space. Default (skip space)
 *   - *:strip_namespace* [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
 * will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
 */
static VALUE sax_parse(int argc, VALUE *argv, VALUE self) {
    struct _saxOptions options;

    options.symbolize       = (No != ox_default_options.sym_keys);
    options.convert_special = ox_default_options.convert_special;
    options.smart           = (Yes == ox_default_options.smart);
    options.skip            = ox_default_options.skip;
    options.hints           = NULL;
    strcpy(options.strip_ns, ox_default_options.strip_ns);

    if (argc < 2) {
        rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
    }
    if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
        VALUE h = argv[2];
        VALUE v;

        if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
            options.convert_special = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
            options.smart = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
            options.symbolize = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
            if (skip_return_sym == v) {
                options.skip = CrSkip;
            } else if (skip_white_sym == v) {
                options.skip = SpcSkip;
            } else if (skip_none_sym == v) {
                options.skip = NoSkip;
            } else if (skip_off_sym == v) {
                options.skip = OffSkip;
            }
        }
        if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
            if (Qfalse == v) {
                *options.strip_ns = '\0';
            } else if (Qtrue == v) {
                *options.strip_ns   = '*';
                options.strip_ns[1] = '\0';
            } else {
                long slen;

                Check_Type(v, T_STRING);
                slen = RSTRING_LEN(v);
                if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
                    rb_raise(ox_parse_error_class,
                             ":strip_namespace can be no longer than %d characters.",
                             (int)sizeof(options.strip_ns) - 1);
                }
                strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
                options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
            }
        }
    }
    ox_sax_parse(argv[0], argv[1], &options);

    return Qnil;
}

/* call-seq: sax_html(handler, io, options)
 *
 * Parses an IO stream or file containing an XML document. Raises an exception
 * if the XML is malformed or the classes specified are not valid.
 * - +handler+ [Ox::Sax] SAX (responds to OX::Sax methods) like handler
 * - +io+ [IO|String] IO Object to read from
 * - +options+ [Hash] options parse options
 *   - *:convert_special* [true|false] flag indicating special characters like &lt; are converted
 *   - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
 *   - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
 * space into a single space. Default (skip space)
 *   - *:overlay* [Hash] a Hash of keys that match html element names and values that are one of
 *     - _:active_ - make the normal callback for the element
 *     - _:nest_ok_ - active but ignore nest check
 *     - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
 *     - _:block_ - block this and all children callbacks
 *     - _:off_ - block this element and it's children unless the child element is active
 *     - _:abort_ - abort the html processing and return
 */
static VALUE sax_html(int argc, VALUE *argv, VALUE self) {
    struct _saxOptions options;
    bool               free_hints = false;

    options.symbolize       = (No != ox_default_options.sym_keys);
    options.convert_special = ox_default_options.convert_special;
    options.smart           = true;
    options.skip            = ox_default_options.skip;
    options.hints           = ox_default_options.html_hints;
    if (NULL == options.hints) {
        options.hints = ox_hints_html();
    }
    *options.strip_ns = '\0';

    if (argc < 2) {
        rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_html.\n");
    }
    if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
        volatile VALUE h = argv[2];
        volatile VALUE v;

        if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
            options.convert_special = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
            options.symbolize = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
            if (skip_return_sym == v) {
                options.skip = CrSkip;
            } else if (skip_white_sym == v) {
                options.skip = SpcSkip;
            } else if (skip_none_sym == v) {
                options.skip = NoSkip;
            } else if (skip_off_sym == v) {
                options.skip = OffSkip;
            }
        }
        if (Qnil != (v = rb_hash_lookup(h, overlay_sym))) {
            int cnt;

            Check_Type(v, T_HASH);
            cnt = (int)RHASH_SIZE(v);
            if (0 == cnt) {
                options.hints = ox_hints_html();
            } else {
                options.hints = ox_hints_dup(options.hints);
                free_hints    = true;
                rb_hash_foreach(v, set_overlay, (VALUE)options.hints);
            }
        }
    }
    ox_sax_parse(argv[0], argv[1], &options);
    if (free_hints) {
        ox_hints_destroy(options.hints);
    }
    return Qnil;
}

static void parse_dump_options(VALUE ropts, Options copts) {
    struct _yesNoOpt ynos[] = {{with_xml_sym, &copts->with_xml},
                               {with_dtd_sym, &copts->with_dtd},
                               {with_instruct_sym, &copts->with_instruct},
                               {xsd_date_sym, &copts->xsd_date},
                               {circular_sym, &copts->circular},
                               {Qnil, 0}};
    YesNoOpt         o;

    if (rb_cHash == rb_obj_class(ropts)) {
        VALUE v;

        if (Qnil != (v = rb_hash_lookup(ropts, ox_indent_sym))) {
            if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
                rb_raise(ox_parse_error_class, ":indent must be a Fixnum.\n");
            }
            copts->indent = NUM2INT(v);
        }
        if (Qnil != (v = rb_hash_lookup(ropts, trace_sym))) {
            if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
                rb_raise(ox_parse_error_class, ":trace must be a Fixnum.\n");
            }
            copts->trace = NUM2INT(v);
        }
        if (Qnil != (v = rb_hash_lookup(ropts, ox_encoding_sym))) {
            if (rb_cString != rb_obj_class(v)) {
                rb_raise(ox_parse_error_class, ":encoding must be a String.\n");
            }
            strncpy(copts->encoding, StringValuePtr(v), sizeof(copts->encoding) - 1);
        }
        if (Qnil != (v = rb_hash_lookup(ropts, no_empty_sym))) {
            copts->no_empty = (v == Qtrue);
        }
        if (Qnil != (v = rb_hash_lookup(ropts, effort_sym))) {
            if (auto_define_sym == v) {
                copts->effort = AutoEffort;
            } else if (tolerant_sym == v) {
                copts->effort = TolerantEffort;
            } else if (strict_sym == v) {
                copts->effort = StrictEffort;
            } else {
                rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
            }
        }
        v = rb_hash_lookup(ropts, invalid_replace_sym);
        if (Qnil == v) {
            if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
                copts->allow_invalid = Yes;
            }
        } else {
            long slen;

            Check_Type(v, T_STRING);
            slen = RSTRING_LEN(v);
            if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
                rb_raise(ox_parse_error_class,
                         ":invalid_replace can be no longer than %d characters.",
                         (int)sizeof(copts->inv_repl) - 2);
            }
            strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
            copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
            *copts->inv_repl                             = (char)slen;
            copts->allow_invalid                         = No;
        }
        v = rb_hash_lookup(ropts, margin_sym);
        if (Qnil != v) {
            long slen;

            Check_Type(v, T_STRING);
            slen = RSTRING_LEN(v);
            if (sizeof(copts->margin) - 2 < (size_t)slen) {
                rb_raise(ox_parse_error_class,
                         ":margin can be no longer than %d characters.",
                         (int)sizeof(copts->margin) - 2);
            }
            strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
            copts->margin[sizeof(copts->margin) - 1] = '\0';
            copts->margin_len                        = (char)slen;
        }

        for (o = ynos; 0 != o->attr; o++) {
            if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
                VALUE c = rb_obj_class(v);

                if (rb_cTrueClass == c) {
                    *o->attr = Yes;
                } else if (rb_cFalseClass == c) {
                    *o->attr = No;
                } else {
                    rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
                }
            }
        }
    }
}

/* call-seq: dump(obj, options) => xml-string
 *
 * Dumps an Object (obj) to a string.
 * - +obj+ [Object] Object to serialize as an XML document String
 * - +options+ [Hash] formating options
 *   - *:indent* [Fixnum] format expected
 *   - *:no_empty* [true|false] if true don't output empty elements
 *   - *:xsd_date* [true|false] use XSD date format if true, default: false
 *   - *:circular* [true|false] allow circular references, default: false
 *   - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
 * :strict
 *     - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
 *     - _:tolerant_ - replaces undumplable objects with nil
 *   - *:with_dtd* [true|false|nil] include DTD in the dump
 *   - *:with_instruct* [true|false|nil] include instructions in the dump
 *   - *:with_xml* [true|false|nil] include XML prolog in the dump
 *
 * Note that an indent of less than zero will result in a tight one line output
 * unless the text in the XML fields contain new line characters.
 */
static VALUE dump(int argc, VALUE *argv, VALUE self) {
    char           *xml;
    struct _options copts = ox_default_options;
    VALUE           rstr;

    if (2 == argc) {
        parse_dump_options(argv[1], &copts);
    }
    if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
        rb_raise(rb_eNoMemError, "Not enough memory.\n");
    }
    rstr = rb_str_new2(xml);
    if ('\0' != *copts.encoding) {
        rb_enc_associate(rstr, rb_enc_find(copts.encoding));
    }
    xfree(xml);

    return rstr;
}

/* call-seq: to_xml(obj, options) => xml-string
 *
 * Dumps an Object (obj) to a string.
 * - +obj+ [Object] Object to serialize as an XML document String
 * - +options+ [Hash] formating options
 *   - *:indent* [Fixnum] format expected
 *   - *:no_empty* [true|false] if true don't output empty elements
 *   - *:xsd_date* [true|false] use XSD date format if true, default: false
 *   - *:circular* [true|false] allow circular references, default: false
 *   - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
 * :strict
 *     - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
 *     - _:tolerant_ - replaces undumplable objects with nil
 *   - *:with_dtd* [true|false|nil] include DTD in the dump
 *   - *:with_instruct* [true|false|nil] include instructions in the dump
 *   - *:with_xml* [true|false|nil] include XML prolog in the dump
 *
 * Note that an indent of less than zero will result in a tight one line output
 * unless the text in the XML fields contain new line characters.
 */
static VALUE to_xml(int argc, VALUE *argv, VALUE self) {
    return dump(argc, argv, self);
}

/* call-seq: to_file(file_path, obj, options) => Object
 *
 * Dumps an Object to the specified file.
 * - +file_path+ [String] file path to write the XML document to
 * - +obj+ [Object] Object to serialize as an XML document String
 * - +options+ [Hash] formating options
 *   - *:indent* [Fixnum] format expected
 *   - *:xsd_date* [true|false] use XSD date format if true, default: false
 *   - *:circular* [true|false] allow circular references, default: false
 *   - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
 * :strict
 *     - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
 *     - _:tolerant_ - replaces undumplable objects with nil
 *   - *:with_dtd* [true|false|nil] include DTD in the dump
 *   - *:with_instruct* [true|false|nil] include instructions in the dump
 *   - *:with_xml* [true|false|nil] include XML prolog in the dump
 *
 * Note that an indent of less than zero will result in a tight one line output
 * unless the text in the XML fields contain new line characters.
 */
static VALUE to_file(int argc, VALUE *argv, VALUE self) {
    struct _options copts = ox_default_options;

    if (3 == argc) {
        parse_dump_options(argv[2], &copts);
    }
    Check_Type(*argv, T_STRING);
    ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);

    return Qnil;
}

#if WITH_CACHE_TESTS
extern void ox_cache_test(void);

static VALUE cache_test(VALUE self) {
    ox_cache_test();
    return Qnil;
}

extern void ox_cache8_test(void);

static VALUE cache8_test(VALUE self) {
    ox_cache8_test();
    return Qnil;
}
#endif

void Init_ox(void) {
#if HAVE_RB_EXT_RACTOR_SAFE
    rb_ext_ractor_safe(true);
#endif
    Ox = rb_define_module("Ox");

    rb_define_module_function(Ox, "default_options", get_def_opts, 0);
    rb_define_module_function(Ox, "default_options=", set_def_opts, 1);

    rb_define_module_function(Ox, "parse_obj", to_obj, 1);
    rb_define_module_function(Ox, "parse", to_gen, 1);
    rb_define_module_function(Ox, "load", load_str, -1);
    rb_define_module_function(Ox, "sax_parse", sax_parse, -1);
    rb_define_module_function(Ox, "sax_html", sax_html, -1);

    rb_define_module_function(Ox, "to_xml", to_xml, -1);
    rb_define_module_function(Ox, "dump", dump, -1);

    rb_define_module_function(Ox, "load_file", load_file, -1);
    rb_define_module_function(Ox, "to_file", to_file, -1);

    rb_define_module_function(Ox, "sax_html_overlay", sax_html_overlay, 0);

    ox_init_builder(Ox);

    rb_require("time");
    rb_require("date");
    // rb_require("bigdecimal");
    rb_require("stringio");

    ox_abort_id             = rb_intern("abort");
    ox_at_column_id         = rb_intern("@column");
    ox_at_content_id        = rb_intern("@content");
    ox_at_id                = rb_intern("at");
    ox_at_line_id           = rb_intern("@line");
    ox_at_pos_id            = rb_intern("@pos");
    ox_at_value_id          = rb_intern("@value");
    ox_attr_id              = rb_intern("attr");
    ox_attr_value_id        = rb_intern("attr_value");
    ox_attributes_id        = rb_intern("@attributes");
    ox_attrs_done_id        = rb_intern("attrs_done");
    ox_beg_id               = rb_intern("@beg");
    ox_bigdecimal_id        = rb_intern("BigDecimal");
    ox_call_id              = rb_intern("call");
    ox_cdata_id             = rb_intern("cdata");
    ox_comment_id           = rb_intern("comment");
    ox_den_id               = rb_intern("@den");
    ox_doctype_id           = rb_intern("doctype");
    ox_end_element_id       = rb_intern("end_element");
    ox_end_id               = rb_intern("@end");
    ox_end_instruct_id      = rb_intern("end_instruct");
    ox_error_id             = rb_intern("error");
    ox_excl_id              = rb_intern("@excl");
    ox_external_encoding_id = rb_intern("external_encoding");
    ox_fileno_id            = rb_intern("fileno");
    ox_force_encoding_id    = rb_intern("force_encoding");
    ox_inspect_id           = rb_intern("inspect");
    ox_instruct_id          = rb_intern("instruct");
    ox_jd_id                = rb_intern("jd");
    ox_keys_id              = rb_intern("keys");
    ox_local_id             = rb_intern("local");
    ox_mesg_id              = rb_intern("mesg");
    ox_message_id           = rb_intern("message");
    ox_nodes_id             = rb_intern("@nodes");
    ox_new_id               = rb_intern("new");
    ox_num_id               = rb_intern("@num");
    ox_parse_id             = rb_intern("parse");
    ox_pos_id               = rb_intern("pos");
    ox_read_id              = rb_intern("read");
    ox_readpartial_id       = rb_intern("readpartial");
    ox_start_element_id     = rb_intern("start_element");
    ox_string_id            = rb_intern("string");
    ox_text_id              = rb_intern("text");
    ox_to_c_id              = rb_intern("to_c");
    ox_value_id             = rb_intern("value");

    encoding_id = rb_intern("encoding");
    has_key_id  = rb_intern("has_key?");

    rb_require("ox/version");
    rb_require("ox/error");
    rb_require("ox/hasattrs");
    rb_require("ox/node");
    rb_require("ox/comment");
    rb_require("ox/instruct");
    rb_require("ox/cdata");
    rb_require("ox/doctype");
    rb_require("ox/element");
    rb_require("ox/document");
    rb_require("ox/bag");
    rb_require("ox/sax");

    ox_time_class         = rb_const_get(rb_cObject, rb_intern("Time"));
    ox_date_class         = rb_const_get(rb_cObject, rb_intern("Date"));
    ox_parse_error_class  = rb_const_get_at(Ox, rb_intern("ParseError"));
    ox_syntax_error_class = rb_const_get_at(Ox, rb_intern("SyntaxError"));
    ox_arg_error_class    = rb_const_get_at(Ox, rb_intern("ArgError"));
    ox_struct_class       = rb_const_get(rb_cObject, rb_intern("Struct"));
    ox_stringio_class     = rb_const_get(rb_cObject, rb_intern("StringIO"));

    abort_sym = ID2SYM(rb_intern("abort"));
    rb_gc_register_address(&abort_sym);
    active_sym = ID2SYM(rb_intern("active"));
    rb_gc_register_address(&active_sym);
    attr_key_mod_sym = ID2SYM(rb_intern("attr_key_mod"));
    rb_gc_register_address(&attr_key_mod_sym);
    auto_define_sym = ID2SYM(rb_intern("auto_define"));
    rb_gc_register_address(&auto_define_sym);
    auto_sym = ID2SYM(rb_intern("auto"));
    rb_gc_register_address(&auto_sym);
    block_sym = ID2SYM(rb_intern("block"));
    rb_gc_register_address(&block_sym);
    circular_sym = ID2SYM(rb_intern("circular"));
    rb_gc_register_address(&circular_sym);
    convert_special_sym = ID2SYM(rb_intern("convert_special"));
    rb_gc_register_address(&convert_special_sym);
    effort_sym = ID2SYM(rb_intern("effort"));
    rb_gc_register_address(&effort_sym);
    element_key_mod_sym = ID2SYM(rb_intern("element_key_mod"));
    rb_gc_register_address(&element_key_mod_sym);
    generic_sym = ID2SYM(rb_intern("generic"));
    rb_gc_register_address(&generic_sym);
    hash_no_attrs_sym = ID2SYM(rb_intern("hash_no_attrs"));
    rb_gc_register_address(&hash_no_attrs_sym);
    hash_sym = ID2SYM(rb_intern("hash"));
    rb_gc_register_address(&hash_sym);
    inactive_sym = ID2SYM(rb_intern("inactive"));
    rb_gc_register_address(&inactive_sym);
    invalid_replace_sym = ID2SYM(rb_intern("invalid_replace"));
    rb_gc_register_address(&invalid_replace_sym);
    limited_sym = ID2SYM(rb_intern("limited"));
    rb_gc_register_address(&limited_sym);
    margin_sym = ID2SYM(rb_intern("margin"));
    rb_gc_register_address(&margin_sym);
    mode_sym = ID2SYM(rb_intern("mode"));
    rb_gc_register_address(&mode_sym);
    nest_ok_sym = ID2SYM(rb_intern("nest_ok"));
    rb_gc_register_address(&nest_ok_sym);
    no_empty_sym = ID2SYM(rb_intern("no_empty"));
    rb_gc_register_address(&no_empty_sym);
    object_sym = ID2SYM(rb_intern("object"));
    rb_gc_register_address(&object_sym);
    off_sym = ID2SYM(rb_intern("off"));
    rb_gc_register_address(&off_sym);
    opt_format_sym = ID2SYM(rb_intern("opt_format"));
    rb_gc_register_address(&opt_format_sym);
    optimized_sym = ID2SYM(rb_intern("optimized"));
    rb_gc_register_address(&optimized_sym);
    overlay_sym = ID2SYM(rb_intern("overlay"));
    rb_gc_register_address(&overlay_sym);
    ox_encoding_sym = ID2SYM(rb_intern("encoding"));
    rb_gc_register_address(&ox_encoding_sym);
    ox_indent_sym = ID2SYM(rb_intern("indent"));
    rb_gc_register_address(&ox_indent_sym);
    ox_size_sym = ID2SYM(rb_intern("size"));
    rb_gc_register_address(&ox_size_sym);
    ox_standalone_sym = ID2SYM(rb_intern("standalone"));
    rb_gc_register_address(&ox_standalone_sym);
    ox_version_sym = ID2SYM(rb_intern("version"));
    rb_gc_register_address(&ox_version_sym);
    skip_none_sym = ID2SYM(rb_intern("skip_none"));
    rb_gc_register_address(&skip_none_sym);
    skip_off_sym = ID2SYM(rb_intern("skip_off"));
    rb_gc_register_address(&skip_off_sym);
    skip_return_sym = ID2SYM(rb_intern("skip_return"));
    rb_gc_register_address(&skip_return_sym);
    skip_sym = ID2SYM(rb_intern("skip"));
    rb_gc_register_address(&skip_sym);
    skip_white_sym = ID2SYM(rb_intern("skip_white"));
    rb_gc_register_address(&skip_white_sym);
    smart_sym = ID2SYM(rb_intern("smart"));
    rb_gc_register_address(&smart_sym);
    strict_sym = ID2SYM(rb_intern("strict"));
    rb_gc_register_address(&strict_sym);
    strip_namespace_sym = ID2SYM(rb_intern("strip_namespace"));
    rb_gc_register_address(&strip_namespace_sym);
    symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys"));
    rb_gc_register_address(&symbolize_keys_sym);
    symbolize_sym = ID2SYM(rb_intern("symbolize"));
    rb_gc_register_address(&symbolize_sym);
    tolerant_sym = ID2SYM(rb_intern("tolerant"));
    rb_gc_register_address(&tolerant_sym);
    trace_sym = ID2SYM(rb_intern("trace"));
    rb_gc_register_address(&trace_sym);
    with_cdata_sym = ID2SYM(rb_intern("with_cdata"));
    rb_gc_register_address(&with_cdata_sym);
    with_dtd_sym = ID2SYM(rb_intern("with_dtd"));
    rb_gc_register_address(&with_dtd_sym);
    with_instruct_sym = ID2SYM(rb_intern("with_instructions"));
    rb_gc_register_address(&with_instruct_sym);
    with_xml_sym = ID2SYM(rb_intern("with_xml"));
    rb_gc_register_address(&with_xml_sym);
    xsd_date_sym = ID2SYM(rb_intern("xsd_date"));
    rb_gc_register_address(&xsd_date_sym);

    ox_empty_string = rb_str_new2("");
    rb_gc_register_address(&ox_empty_string);
    ox_zero_fixnum = INT2NUM(0);
    rb_gc_register_address(&ox_zero_fixnum);
    ox_sym_bank = rb_ary_new();
    rb_gc_register_address(&ox_sym_bank);

    ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
    ox_element_clas  = rb_const_get_at(Ox, rb_intern("Element"));
    ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
    ox_comment_clas  = rb_const_get_at(Ox, rb_intern("Comment"));
    ox_raw_clas      = rb_const_get_at(Ox, rb_intern("Raw"));
    ox_doctype_clas  = rb_const_get_at(Ox, rb_intern("DocType"));
    ox_cdata_clas    = rb_const_get_at(Ox, rb_intern("CData"));
    ox_bag_clas      = rb_const_get_at(Ox, rb_intern("Bag"));

    // Classes can move in more recent versions so register them all.
    rb_gc_register_address(&Ox);
    rb_gc_register_address(&ox_arg_error_class);
    rb_gc_register_address(&ox_bag_clas);
    rb_gc_register_address(&ox_bag_clas);
    rb_gc_register_address(&ox_cdata_clas);
    rb_gc_register_address(&ox_cdata_clas);
    rb_gc_register_address(&ox_comment_clas);
    rb_gc_register_address(&ox_comment_clas);
    rb_gc_register_address(&ox_date_class);
    rb_gc_register_address(&ox_doctype_clas);
    rb_gc_register_address(&ox_doctype_clas);
    rb_gc_register_address(&ox_document_clas);
    rb_gc_register_address(&ox_document_clas);
    rb_gc_register_address(&ox_element_clas);
    rb_gc_register_address(&ox_element_clas);
    rb_gc_register_address(&ox_encoding_sym);
    rb_gc_register_address(&ox_indent_sym);
    rb_gc_register_address(&ox_instruct_clas);
    rb_gc_register_address(&ox_instruct_clas);
    rb_gc_register_address(&ox_parse_error_class);
    rb_gc_register_address(&ox_raw_clas);
    rb_gc_register_address(&ox_raw_clas);
    rb_gc_register_address(&ox_size_sym);
    rb_gc_register_address(&ox_standalone_sym);
    rb_gc_register_address(&ox_stringio_class);
    rb_gc_register_address(&ox_struct_class);
    rb_gc_register_address(&ox_syntax_error_class);
    rb_gc_register_address(&ox_time_class);
    rb_gc_register_address(&ox_version_sym);

    slot_cache_new(&ox_class_cache);

    ox_sax_define();
    ox_hash_init();

#if WITH_CACHE_TESTS
    // space added to stop yardoc from trying to document
    rb_define _module_function(Ox, "cache_test", cache_test, 0);
    rb_define _module_function(Ox, "cache8_test", cache8_test, 0);
#endif

    ox_utf8_encoding = rb_enc_find("UTF-8");
}

#if __GNUC__ > 4
_Noreturn void
#else
void
#endif
_ox_raise_error(const char *msg, const char *xml, const char *current, const char *file, int line) {
    int xline = 1;
    int col   = 1;

    for (; xml < current && '\n' != *current; current--) {
        col++;
    }
    for (; xml < current; current--) {
        if ('\n' == *current) {
            xline++;
        }
    }
    rb_gc_enable();
    rb_raise(ox_parse_error_class, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line);
}
