class XMLParser

Constants

Error
PARAM_ENTITY_PARSING_ALWAYS
PARAM_ENTITY_PARSING_NEVER
PARAM_ENTITY_PARSING_UNLESS_STANDALONE

Public Class Methods

expatVersion() click to toggle source
endif

static VALUE
XMLParser_s_expatVersion(VALUE obj)
{
#if defined(HAVE_EXPAT_H)
  return ENC_(rb_str_new2(XML_ExpatVersion()));
#elif defined(EXPAT_1_2)
  return ENC_(rb_str_new2("1.2"));
#elif defined(NEW_EXPAT)
  return ENC_(rb_str_new2("1.1"));
#else
  return ENC_(rb_str_new2("1.0"));
#endif
}
getFeatureList() click to toggle source
HAVE_XML_GETFEATURELIST
static VALUE
XMLParser_s_getFeatureList(VALUE obj)
{
  const XML_Feature* list;
  VALUE ret = rb_hash_new();

  list = XML_GetFeatureList();
  while (list && list->feature) {
    rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
    list++;
  }

  return ret;
}
new(p1 = v1, p2 = v2, p3 = v3) click to toggle source

constructor

static VALUE
XMLParser_new(int argc, VALUE* argv, VALUE klass)
{
  XMLParser* parser;
  VALUE obj;
  VALUE arg1;
  VALUE arg2;
  VALUE arg3;
  int count;
  char* encoding = NULL;
#ifdef NEW_EXPAT
  char* nssep = NULL;
#endif
  char* context = NULL;
  XMLParser* rootparser = NULL;
  VALUE parent = Qnil;

  count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
  if (count == 1) {
    /* new(encoding) */
    if (TYPE(arg1) != T_NIL) {
      Check_Type(arg1, T_STRING); /* encoding */
      encoding = RSTRING_PTR(arg1);
    }
  }
  else if (count == 2) {
    /* new(encoding, nschar) */
    /* new(parser, context) */
#ifdef NEW_EXPAT
    if (TYPE(arg1) != T_DATA) {
      if (TYPE(arg1) != T_NIL) {
        Check_Type(arg1, T_STRING); /* encoding */
        encoding = RSTRING_PTR(arg1);
      }
      Check_Type(arg2, T_STRING); /* nschar */
      nssep = RSTRING_PTR(arg2);
    }
    else {
#endif
      Check_Type(arg1, T_DATA); /* parser */
      GET_PARSER(arg1, rootparser);
      if (!NIL_P(arg2)) {
        Check_Type(arg2, T_STRING); /* context */
        context = RSTRING_PTR(arg2);
      }
      parent = arg1;
#ifdef NEW_EXPAT
    }
#endif
  }
  else if (count == 3) {
    /* new(parser, context, encoding) */
    Check_Type(arg1, T_DATA); /* parser */
    GET_PARSER(arg1, rootparser);
    if (!NIL_P(arg2)) {
      Check_Type(arg2, T_STRING); /* context */
      context = RSTRING_PTR(arg2);
    }
    Check_Type(arg3, T_STRING); /* encoding */
    encoding = RSTRING_PTR(arg3);
    parent = arg1;
  }

  /* create object */
  obj = Data_Make_Struct(klass, XMLParser,
                         XMLParser_mark, XMLParser_free, parser);
  /* create parser */
  if (rootparser == NULL) {
#ifdef NEW_EXPAT
    if (nssep == NULL)
      parser->parser = XML_ParserCreate(encoding);
    else
      parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
#else
    parser->parser = XML_ParserCreate(encoding);
#endif
    parser->tainted = 0;
    parser->context = NULL;
  }
  else {
    parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
                                                    context, encoding);
    /* clear all inhrited handlers,
       because handlers should be set in "parse" method  */
    XML_SetElementHandler(parser->parser, NULL, NULL);
    XML_SetCharacterDataHandler(parser->parser, NULL);
    XML_SetProcessingInstructionHandler(parser->parser, NULL);
    XML_SetDefaultHandler(parser->parser, NULL);
    XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
    XML_SetNotationDeclHandler(parser->parser, NULL);
    XML_SetExternalEntityRefHandler(parser->parser, NULL);
#ifdef NEW_EXPAT
    XML_SetCommentHandler(parser->parser, NULL);
    XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
    XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
    XML_SetNotStandaloneHandler(parser->parser, NULL);
#endif
#ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
    XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
#endif
#ifdef HAVE_EXPAT_H
    XML_SetElementDeclHandler(parser->parser, NULL);
    XML_SetAttlistDeclHandler(parser->parser, NULL);
    XML_SetXmlDeclHandler(parser->parser, NULL);
    XML_SetEntityDeclHandler(parser->parser, NULL);
#endif
#if 0
    XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
    XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
#endif
#ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
    XML_SetSkippedEntityHandler(parser->parser, NULL);
#endif
    if (rootparser->tainted)
      parser->tainted |= 1;
    parser->context = context;
  }
  if (!parser->parser)
    rb_raise(eXMLParserError, "cannot create parser");

  /* setting up internal data */
  XML_SetUserData(parser->parser, (void*)obj);
  parser->iterator = 0;
  parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
  parser->lastAttrs = NULL;
#endif
  parser->parent = parent;
  parser->detectedEncoding = NULL;

  rb_obj_call_init(obj, argc, argv);

  return obj;
}

Public Instance Methods

byteCount() click to toggle source
endif

static VALUE
XMLParser_getCurrentByteCount(VALUE obj)
{
  XMLParser* parser;

  GET_PARSER(obj, parser);
  return INT2FIX(XML_GetCurrentByteCount(parser->parser));
}
byteIndex() click to toggle source

byte index method

static VALUE
XMLParser_getCurrentByteIndex(VALUE obj)
{
  XMLParser* parser;
  long pos;

  GET_PARSER(obj, parser);
  pos = XML_GetCurrentByteIndex(parser->parser);

  return INT2FIX(pos);
}
column() click to toggle source

column method

static VALUE
XMLParser_getCurrentColumnNumber(VALUE obj)
{
  XMLParser* parser;
  int column;

  GET_PARSER(obj, parser);
  column = XML_GetCurrentColumnNumber(parser->parser);

  return INT2FIX(column);
}
defaultCurrent() click to toggle source

defaultCurrent method

static VALUE
XMLParser_defaultCurrent(VALUE obj)
{
  XMLParser* parser;

  GET_PARSER(obj, parser);
  if (!(parser->iterator)) {
    XML_DefaultCurrent(parser->parser);
  }
  else {
    parser->defaultCurrent = 1;
  }
  return Qnil;
}
done() click to toggle source

done method

static VALUE
XMLParser_done(VALUE obj)
{
  XMLParser* parser;

  GET_PARSER(obj, parser);
  if (parser->parser) {
    XML_ParserFree(parser->parser);
    parser->parser = NULL;
  }
  return Qnil;
}
getBase() click to toggle source

get URI base

static VALUE
XMLParser_getBase(VALUE obj)
{
  XMLParser* parser;
  const XML_Char* ret;

  GET_PARSER(obj, parser);
  ret = XML_GetBase(parser->parser);
  if (!ret)
    return Qnil;

  return TO_(ENC_(rb_str_new2((char*)ret)));
}
getIdAttribute() click to toggle source
static VALUE
XMLParser_getIdAttrribute(VALUE obj)
{
  XMLParser* parser;
  int idattr;
  const XML_Char** atts;

  GET_PARSER(obj, parser);
  atts = parser->lastAttrs;
  if (!atts)
    return Qnil;
  idattr = XML_GetIdAttributeIndex(parser->parser);
  if (idattr < 0)
    return Qnil;
  return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
}
getInputContext() click to toggle source
static VALUE
XMLParser_getInputContext(VALUE obj)
{
  XMLParser* parser;
  const char* buffer;
  int offset;
  int size;
  VALUE ret = Qnil;

  GET_PARSER(obj, parser);
  buffer = XML_GetInputContext(parser->parser,
                               &offset,
                               &size);
  if (buffer && size > 0) {
    ret = rb_ary_new3(2,
                      TO_(ENC_(rb_str_new(buffer, size))),
                      INT2FIX(offset));
  }

  return ret;
}
getSpecifiedAttributes() click to toggle source
0
static VALUE
XMLParser_getSpecifiedAttributes(VALUE obj)
{
  XMLParser* parser;
  int count;
  const XML_Char** atts;
  VALUE attrhash;

  GET_PARSER(obj, parser);
  atts = parser->lastAttrs;
  if (!atts)
    return Qnil;
  count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
  attrhash = rb_hash_new();
  while (*atts) {
    const char* key = *atts++;
    atts++;
    rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
                 (count-- > 0) ? Qtrue: Qfalse);
  }

  return attrhash;
}
line() click to toggle source

line method

static VALUE
XMLParser_getCurrentLineNumber(VALUE obj)
{
  XMLParser* parser;
  int line;

  GET_PARSER(obj, parser);
  line = XML_GetCurrentLineNumber(parser->parser);

  return INT2FIX(line);
}
parse(p1 = v1, p2 = v2) click to toggle source

parse method

static VALUE
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
{
  XMLParser* parser;
  int ret;
  VALUE str;
  VALUE isFinal;
  int final = 1;
  int count;
  int fromStream = 0;
  ID mid = rb_intern("gets");
  ID linebuf = rb_intern("_linebuf");

  count = rb_scan_args(argc, argv, "02", &str, &isFinal);
  /* If "str" has public "gets" method, it will be considered *stream* */
  if (!rb_obj_is_kind_of(str, rb_cString) &&
      rb_method_boundp(CLASS_OF(str), mid, 1)) {
    fromStream = 1;
  }
  else if (!NIL_P(str)) {
    Check_Type(str, T_STRING);
  }
  if (count >= 2) {
    if (isFinal == Qtrue)
      final = 1;
    else if (isFinal == Qfalse)
      final = 0;
    else
      rb_raise(rb_eTypeError, "not valid value");
  }

  GET_PARSER(obj, parser);

  parser->iterator = rb_block_given_p();

  /* Setup event handlers */
  setup_evnet_handlers(parser, obj);

  /* Parse from stream (probably slightly slow) */
  if (fromStream) {
    VALUE buf;

    if (OBJ_TAINTED(str))
      taintParser(parser);
    do {
      buf = rb_funcall(str, mid, 0);
      if (!NIL_P(buf)) {
        Check_Type(buf, T_STRING);
        if (OBJ_TAINTED(buf))
          taintParser(parser);
        rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
        ret = XML_Parse(parser->parser,
                        RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
      }
      else {
        ret = XML_Parse(parser->parser, NULL, 0, 1);
      }
      if (!ret) {
        int err = XML_GetErrorCode(parser->parser);
        const char* errStr = XML_ErrorString(err);
        rb_raise(eXMLParserError, "%s", errStr);
      }
    } while (!NIL_P(buf));
    return Qnil;
  }

  /* Parse string */
  if (!NIL_P(str)) {
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
    int err;
#endif
    if (OBJ_TAINTED(str))
      taintParser(parser);
    ret = XML_Parse(parser->parser,
                    RSTRING_PTR(str), RSTRING_LEN(str), final);
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
    /* Ruby 1.9.1 Encoding conversion */
    err = XML_GetErrorCode(parser->parser);
    if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
      rb_encoding* enc;
      volatile VALUE encobj;
      volatile VALUE ustr;
      enc = rb_enc_find(parser->detectedEncoding);
      if ((int)rb_enc_to_index(enc) != rb_ascii8bit_encindex()) {
        rb_enc_associate(str, enc);
        encobj = rb_enc_from_encoding(enc_xml);
        /* rb_str_encode may raises an exception */
        ustr = rb_str_encode(str, encobj, 0, Qnil);
        if (!NIL_P(ustr)) {
          XML_ParserReset(parser->parser, "utf-8");
          XML_SetUserData(parser->parser, (void*)obj);
          parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
          parser->lastAttrs = NULL;
#endif
          parser->detectedEncoding = NULL;
          setup_evnet_handlers(parser, obj);
          ret = XML_Parse(parser->parser,
                          RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
        }
      }
    }
#endif
  }
  else
    ret = XML_Parse(parser->parser, NULL, 0, final);
  if (!ret) {
    int err = XML_GetErrorCode(parser->parser);
    const char* errStr = XML_ErrorString(err);
    rb_raise(eXMLParserError, "%s", errStr);
  }

  return Qnil;
}
reset(p1 = v1) click to toggle source
HAVE_XML_PARSERRESET
static VALUE
XMLParser_reset(int argc, VALUE* argv, VALUE obj)
{
  XMLParser* parser;
  VALUE vencoding = Qnil;
  char* encoding = NULL;
  int count;

  count = rb_scan_args(argc, argv, "01", &vencoding);

  GET_PARSER(obj, parser);
  if (count > 0 && TYPE(vencoding) != T_NIL) {
    Check_Type(vencoding, T_STRING);
    encoding = RSTRING_PTR(vencoding);
  }
  XML_ParserReset(parser->parser, encoding);
  /* setting up internal data */
  XML_SetUserData(parser->parser, (void*)obj);
  parser->iterator = 0;
  parser->defaultCurrent = 0;
#ifdef NEW_EXPAT
  parser->lastAttrs = NULL;
#endif
  parser->tainted = 0;
  parser->detectedEncoding = NULL;

  return obj;
}
setBase(p1) click to toggle source

set URI base

static VALUE
XMLParser_setBase(VALUE obj, VALUE base)
{
  XMLParser* parser;
  int ret;

  Check_Type(base, T_STRING);
  GET_PARSER(obj, parser);
  if (OBJ_TAINTED(base))
    taintParser(parser);
  ret = XML_SetBase(parser->parser, RSTRING_PTR(base));

  return INT2FIX(ret);
}
setParamEntityParsing(p1) click to toggle source
XML_DTD
static VALUE
XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
{
  XMLParser* parser;
  int ret;

  Check_Type(parsing, T_FIXNUM);
  GET_PARSER(obj, parser);
  ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));

  return INT2FIX(ret);
}
setReturnNSTriplet(p1) click to toggle source
HAVE_EXPAT_H
static VALUE
XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
{
  XMLParser* parser;
  int nst;

  GET_PARSER(obj, parser);
  switch (TYPE(do_nst)) {
  case T_TRUE:
    nst = 1;
    break;
  case T_FALSE:
    nst = 0;
    break;
  case T_FIXNUM:
    nst = FIX2INT(do_nst);
    break;
  default:
    rb_raise(rb_eTypeError, "not valid value");
  }
  XML_SetReturnNSTriplet(parser->parser, nst);

  return Qnil;
}
useForeignDTD(p1) click to toggle source
HAVE_XML_USEFOREIGNDTD
static VALUE
XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
{
  XMLParser* parser;
  int dtd;
  int ret;

  GET_PARSER(obj, parser);
  switch (TYPE(useDTD)) {
  case T_TRUE:
    dtd = 1;
    break;
  case T_FALSE:
    dtd = 0;
    break;
  case T_FIXNUM:
    dtd = FIX2INT(useDTD);
    break;
  default:
    rb_raise(rb_eTypeError, "not valid value");
  }
  ret = XML_UseForeignDTD(parser->parser, dtd);

  return INT2FIX(ret);
}