begin

%%{

machine tokenizer;

number = ('+'|'-')?[0-9]+('.'[0-9]+)?;
string = '"' [^"]* '"';
unassigned_bareword = "'" [a-zA-Z_]+;
bareword = [a-zA-Z_]+ | '+' | '-' | '*' | '/' | '<' | '>' | '=' | '@' | '**';
open_paren = '(';
close_paren = ')';
open_curly = '{';
close_curly = '}';
open_bracket = '[';
close_bracket = ']';
open_dict_bracket = '@[';
start_comment = '/*';
end_comment = '*/';
bar = "|";

main := |*

  number => { emit(:NUMBER, data[ts...te].to_r, ts, te - 1) };
  string => { emit(:STRING, data[ts+1...te-1], ts, te - 1) };
  unassigned_bareword => { emit(:UNASSIGNED_BAREWORD, data[ts+1 ...te], ts, te - 1) };
  bareword => { emit(:BAREWORD, data[ts...te], ts, te - 1) };
  open_paren => { emit(:OPEN_PAREN, data[ts...te], ts, te - 1) };
  close_paren => { emit(:CLOSE_PAREN, data[ts...te], ts, te - 1) };
  open_curly => { emit(:OPEN_CURLY, data[ts...te], ts, te - 1) };
  close_curly => { emit(:CLOSE_CURLY, data[ts...te], ts, te - 1) };
  open_bracket => { emit(:OPEN_BRACKET, data[ts...te], ts, te - 1) };
  close_bracket => { emit(:CLOSE_BRACKET, data[ts...te], ts, te - 1) };
  open_dict_bracket => { emit(:OPEN_DICT_BRACKET, data[ts...te], ts, te - 1) };
  start_comment => { emit(:START_COMMENT, data[ts...te], ts, te - 1) };
  end_comment => { emit(:END_COMMENT, data[ts...te], ts, te - 1) };
  bar => { emit(:BAR, data[ts...te], ts, te - 1) };
  space;
  any => { raise "Could not lex '#{ data[ts...te] }'" };

*|;

}%%

end

module Halunke

class Tokenizer
  def initialize
    %% write data;
    @tokens = []
  end

  def tokenize(data)
    data.chomp!
    eof = data.length

    %% write init;
    %% write exec;

    @tokens
  end

  private

  def emit(type, value, ts, te)
    @tokens << [ type, [ value, ts, te ] ]
  end
end

end