module CSVScan
Public Class Methods
scan(p1)
click to toggle source
VALUE csv_scan(VALUE self, VALUE port) { int cs, act, have = 0, nread = 0, curline = 1; unsigned char *tokstart = NULL, *tokend = NULL, *buf; VALUE row, coldata; VALUE bufsize = Qnil; int done=0, buffer_size; if ( !rb_respond_to( port, s_read ) ) { if ( rb_respond_to( port, s_to_str ) ) { port = rb_funcall( port, s_to_str, 0 ); StringValue(port); } else { rb_raise( rb_eArgError, "bad argument, String or IO only please." ); } } buffer_size = BUFSIZE; if (rb_ivar_defined(self, rb_intern("@buffer_size")) == Qtrue) { bufsize = rb_ivar_get(self, rb_intern("@buffer_size")); if (!NIL_P(bufsize)) { buffer_size = NUM2INT(bufsize); } } buf = ALLOC_N(unsigned char, buffer_size); #line 47 "csvscan.c" { cs = csv_scan_start; tokstart = 0; tokend = 0; act = 0; } #line 102 "csvscan.rl" row = rb_ary_new(); coldata = Qnil; while( !done ) { VALUE str; unsigned char *p = buf + have, *pe; int len, space = buffer_size - have; if ( space == 0 ) { rb_raise(rb_eCSVParseError, "ran out of buffer on line %d.", curline); } if ( rb_respond_to( port, s_read ) ) { str = rb_funcall( port, s_read, 1, INT2FIX(space) ); } else { str = rb_str_substr( port, nread, space ); } StringValue(str); memcpy( p, RSTRING_PTR(str), RSTRING_LEN(str) ); len = RSTRING_LEN(str); nread += len; /* If this is the last buffer, tack on an EOF. */ if ( len < space ) { p[len++] = 0; done = 1; } pe = p + len; #line 87 "csvscan.c" { if ( p == pe ) goto _out; switch ( cs ) { tr0: #line 19 "csvscan.rl" {tokend = p;{p = ((tokend))-1;}} goto st2; tr1: #line 10 "csvscan.rl" { curline += 1; } #line 20 "csvscan.rl" { rb_ary_push(row, coldata); rb_yield(row); coldata = Qnil; row = rb_ary_new(); } #line 20 "csvscan.rl" {tokend = p+1;{p = ((tokend))-1;}} goto st2; tr2: #line 49 "csvscan.rl" {tokend = p;{ unsigned char ch, *start_p, *wptr, *rptr; int rest, datalen; start_p = wptr = tokstart; rptr = tokstart + 1; rest = tokend - tokstart - 2; datalen = 0; while(rest>0) { ch = *rptr++; if (ch=='"') { rptr++; rest--; } *wptr++ = ch; datalen++; rest--; } coldata = rb_str_new( start_p, datalen ); }{p = ((tokend))-1;}} goto st2; tr5: #line 1 "csvscan.rl" { switch( act ) { case 0: tokend = tokstart; {goto st1;} case 4: { unsigned char ch, *endp; int datalen; datalen = tokend - tokstart; endp = tokend - 1; while(datalen>0) { ch = *endp--; if (ch==' ' || ch=='\t') { datalen--; } else { break; } } if (datalen==0) { coldata = Qnil; } else { coldata = rb_str_new(tokstart, datalen); } } break; case 5: { unsigned char ch, *start_p, *wptr, *rptr; int rest, datalen; start_p = wptr = tokstart; rptr = tokstart + 1; rest = tokend - tokstart - 2; datalen = 0; while(rest>0) { ch = *rptr++; if (ch=='"') { rptr++; rest--; } *wptr++ = ch; datalen++; rest--; } coldata = rb_str_new( start_p, datalen ); } break; default: break; } {p = ((tokend))-1;}} goto st2; tr6: #line 19 "csvscan.rl" {tokend = p+1;{p = ((tokend))-1;}} goto st2; tr7: #line 19 "csvscan.rl" {tokend = p+1;{p = ((tokend))-1;}} #line 10 "csvscan.rl" { curline += 1; } #line 20 "csvscan.rl" { rb_ary_push(row, coldata); rb_yield(row); coldata = Qnil; row = rb_ary_new(); } goto st2; tr10: #line 26 "csvscan.rl" {tokend = p+1;{ rb_ary_push(row, coldata); coldata = Qnil; }{p = ((tokend))-1;}} goto st2; st2: #line 1 "csvscan.rl" {tokstart = 0;} #line 1 "csvscan.rl" {act = 0;} if ( ++p == pe ) goto _out2; case 2: #line 1 "csvscan.rl" {tokstart = p;} #line 220 "csvscan.c" switch( (*p) ) { case 9u: goto tr6; case 10u: goto tr7; case 13u: goto st4; case 32u: goto tr6; case 34u: goto st0; case 44u: goto tr10; } if ( 11u <= (*p) && (*p) <= 12u ) goto tr8; goto tr4; tr4: #line 1 "csvscan.rl" {tokend = p+1;} #line 30 "csvscan.rl" {act = 4;} goto st3; tr8: #line 1 "csvscan.rl" {tokend = p+1;} #line 19 "csvscan.rl" {act = 1;} goto st3; st3: if ( ++p == pe ) goto _out3; case 3: #line 248 "csvscan.c" switch( (*p) ) { case 10u: goto tr5; case 13u: goto tr5; case 34u: goto tr5; case 44u: goto tr5; } goto tr4; st4: if ( ++p == pe ) goto _out4; case 4: if ( (*p) == 10u ) goto tr1; goto tr0; tr11: #line 10 "csvscan.rl" { curline += 1; } goto st0; st0: if ( ++p == pe ) goto _out0; case 0: #line 273 "csvscan.c" switch( (*p) ) { case 10u: goto tr11; case 34u: goto tr12; } goto st0; tr12: #line 1 "csvscan.rl" {tokend = p+1;} #line 49 "csvscan.rl" {act = 5;} goto st5; st5: if ( ++p == pe ) goto _out5; case 5: #line 289 "csvscan.c" if ( (*p) == 34u ) goto st0; goto tr2; st1: goto _out1; } _out2: cs = 2; goto _out; _out3: cs = 3; goto _out; _out4: cs = 4; goto _out; _out0: cs = 0; goto _out; _out5: cs = 5; goto _out; _out1: cs = 1; goto _out; _out: {} } #line 134 "csvscan.rl" if ( cs == csv_scan_error ) { free(buf); rb_raise(rb_eCSVParseError, "parse error on line %d.", curline); } if ( tokstart == 0 ) { have = 0; } else { have = pe - tokstart; memmove( buf, tokstart, have ); tokend = buf + (tokend - tokstart); tokstart = buf; } } free(buf); return Qnil; }