module CsvParser

Public Class Methods

parse_line(p1, p2) click to toggle source
static VALUE parse_line(VALUE self, VALUE str, VALUE encoding)
{
    if (NIL_P(str))
        return Qnil;
    
    const char *ptr = RSTRING_PTR(str);
    int len = (int) RSTRING_LEN(str);  /* cast to prevent warning in 64-bit OS */

    if (len == 0)
        return Qnil;
    
    VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
    char value[len];  /* field value, no longer than line */
    int state = 0;
    int index = 0;
    int i;
    char c;
    
    /* Grab encoding to be used for string malloc */
    rb_encoding* rb_encoding = rb_enc_find(RSTRING_PTR(encoding));
    if(rb_encoding == NULL)
        return Qnil;

    for (i = 0; i < len; i++)
    {
        c = ptr[i];
        switch (c)
        {
            case ',':
                if (state == UNQUOTED) {
                    rb_ary_push(array, (index == 0 ? Qnil: rb_enc_str_new(value, index, rb_encoding)));
                    index = 0;
                }
                else if (state == IN_QUOTED) {
                    value[index++] = c;
                }
                else if (state == QUOTE_IN_QUOTED) {
                    rb_ary_push(array, rb_enc_str_new(value, index, rb_encoding));
                    index = 0;
                    state = UNQUOTED;
                }
                break;
            case '"':
                if (state == UNQUOTED) {
                    state = IN_QUOTED;
                }
                else if (state == IN_QUOTED) {
                    state = QUOTE_IN_QUOTED;
                }
                else if (state == QUOTE_IN_QUOTED) {
                    value[index++] = c;  /* escaped quote */
                    state = IN_QUOTED;
                }
                break;
            case 13:  /* \r */
            case 10:  /* \n */
                if (state == IN_QUOTED) {
                    value[index++] = c;
                }
                else {
                    i = len;  /* only parse first line if multiline */
                }
                break;
            default:
                value[index++] = c;
        }
    }
    
    if (state == UNQUOTED) {
        rb_ary_push(array, (index == 0 ? Qnil: rb_enc_str_new(value, index, rb_encoding)));
    }
    else if (state == QUOTE_IN_QUOTED) {
        rb_ary_push(array, rb_enc_str_new(value, index, rb_encoding));
    }
    return array;
}