class Ovec::Tier
Constants
- DATE_REGEX
TODO: generally tie “5.~batalion”, … All changes within this regex are changed to a tilde.
- REGEX
The last character this regex matches is changed to a tilde.
Public Instance Methods
run()
click to toggle source
# File lib/ovec/tier.rb, line 22 def run # ~ neni mezi \p{Z}. # TODO: moznosti na dvojpismenne predlozky matches = @joined.to_enum(:scan, REGEX).map { Regexp.last_match } # Matches may overlap, find all matches with more scans. # TODO: optimize until matches.empty? for i in 0...matches.length # TODO: check ze za tim neni tilda nikde (napr. na dalsim radku, par mezer pozdeji, ...) match = matches[i] change = match.end(0) - 1 chunk, offset = _find_chunk_and_offset(change) former_character = chunk[offset] chunk[offset] = '~' if former_character == '\n' # If we changed a newline to a tilde, change previous space to a # newline -- move the tied word to the other line. j = change - 1 while j >= 0 break if @joined[j] == '\n' # Don't cross newlines. if @joined[j] == ' ' chunk, offset = _find_chunk_and_offset(j) chunk[offset] = '\n' break end j -= 1 end end end _rejoin matches = @joined.to_enum(:scan, REGEX).map { Regexp.last_match } end # Dates can't overlap. 1 scan is enough. matches = @joined.to_enum(:scan, DATE_REGEX).map { Regexp.last_match } for match in matches for i in (match.begin(0))...(match.end(0)) if @joined[i] == ' ' chunk, offset = _find_chunk_and_offset(i) chunk[offset] = '~' end end end end